Completed
Push — master ( 56f604...098737 )
by Lars
02:55
created

UTF8::strtoupper()   D

Complexity

Conditions 9
Paths 29

Size

Total Lines 42
Code Lines 21

Duplication

Lines 42
Ratio 100 %

Code Coverage

Tests 0
CRAP Score 90

Importance

Changes 0
Metric Value
dl 42
loc 42
ccs 0
cts 0
cp 0
rs 4.909
c 0
b 0
f 0
cc 9
eloc 21
nc 29
nop 4
crap 90
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  /**
15
   * @var array
16
   */
17
  private static $WIN1252_TO_UTF8 = array(
18
      128 => "\xe2\x82\xac", // EURO SIGN
19
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
20
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
21
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
22
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
23
      134 => "\xe2\x80\xa0", // DAGGER
24
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
25
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
26
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
27
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
28
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
29
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
30
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
31
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
32
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
33
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
34
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
35
      149 => "\xe2\x80\xa2", // BULLET
36
      150 => "\xe2\x80\x93", // EN DASH
37
      151 => "\xe2\x80\x94", // EM DASH
38
      152 => "\xcb\x9c", // SMALL TILDE
39
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
40
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
41
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
42
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
43
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
44
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
45
  );
46
47
  /**
48
   * @var array
49
   */
50
  private static $CP1252_TO_UTF8 = array(
51
      '€' => '€',
52
      '‚' => '‚',
53
      'ƒ' => 'ƒ',
54
      '„' => '„',
55
      '…' => '…',
56
      '†' => '†',
57
      '‡' => '‡',
58
      'ˆ' => 'ˆ',
59
      '‰' => '‰',
60
      'Š' => 'Š',
61
      '‹' => '‹',
62
      'Œ' => 'Œ',
63
      'Ž' => 'Ž',
64
      '‘' => '‘',
65
      '’' => '’',
66
      '“' => '“',
67
      '”' => '”',
68
      '•' => '•',
69
      '–' => '–',
70
      '—' => '—',
71
      '˜' => '˜',
72
      '™' => '™',
73
      'š' => 'š',
74
      '›' => '›',
75
      'œ' => 'œ',
76
      'ž' => 'ž',
77
      'Ÿ' => 'Ÿ',
78
  );
79
80
  /**
81
   * Bom => Byte-Length
82
   *
83
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
84
   *
85
   * @var array
86
   */
87
  private static $BOM = array(
88
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
89
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
90
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
91
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
94
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
95
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
96
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
97
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
98
  );
99
100
  /**
101
   * Numeric code point => UTF-8 Character
102
   *
103
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
104
   *
105
   * @var array
106
   */
107
  private static $WHITESPACE = array(
108
    // NUL Byte
109
    0     => "\x0",
110
    // Tab
111
    9     => "\x9",
112
    // New Line
113
    10    => "\xa",
114
    // Vertical Tab
115
    11    => "\xb",
116
    // Carriage Return
117
    13    => "\xd",
118
    // Ordinary Space
119
    32    => "\x20",
120
    // NO-BREAK SPACE
121
    160   => "\xc2\xa0",
122
    // OGHAM SPACE MARK
123
    5760  => "\xe1\x9a\x80",
124
    // MONGOLIAN VOWEL SEPARATOR
125
    6158  => "\xe1\xa0\x8e",
126
    // EN QUAD
127
    8192  => "\xe2\x80\x80",
128
    // EM QUAD
129
    8193  => "\xe2\x80\x81",
130
    // EN SPACE
131
    8194  => "\xe2\x80\x82",
132
    // EM SPACE
133
    8195  => "\xe2\x80\x83",
134
    // THREE-PER-EM SPACE
135
    8196  => "\xe2\x80\x84",
136
    // FOUR-PER-EM SPACE
137
    8197  => "\xe2\x80\x85",
138
    // SIX-PER-EM SPACE
139
    8198  => "\xe2\x80\x86",
140
    // FIGURE SPACE
141
    8199  => "\xe2\x80\x87",
142
    // PUNCTUATION SPACE
143
    8200  => "\xe2\x80\x88",
144
    // THIN SPACE
145
    8201  => "\xe2\x80\x89",
146
    //HAIR SPACE
147
    8202  => "\xe2\x80\x8a",
148
    // LINE SEPARATOR
149
    8232  => "\xe2\x80\xa8",
150
    // PARAGRAPH SEPARATOR
151
    8233  => "\xe2\x80\xa9",
152
    // NARROW NO-BREAK SPACE
153
    8239  => "\xe2\x80\xaf",
154
    // MEDIUM MATHEMATICAL SPACE
155
    8287  => "\xe2\x81\x9f",
156
    // IDEOGRAPHIC SPACE
157
    12288 => "\xe3\x80\x80",
158
  );
159
160
  /**
161
   * @var array
162
   */
163
  private static $WHITESPACE_TABLE = array(
164
      'SPACE'                     => "\x20",
165
      'NO-BREAK SPACE'            => "\xc2\xa0",
166
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
167
      'EN QUAD'                   => "\xe2\x80\x80",
168
      'EM QUAD'                   => "\xe2\x80\x81",
169
      'EN SPACE'                  => "\xe2\x80\x82",
170
      'EM SPACE'                  => "\xe2\x80\x83",
171
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
172
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
173
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
174
      'FIGURE SPACE'              => "\xe2\x80\x87",
175
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
176
      'THIN SPACE'                => "\xe2\x80\x89",
177
      'HAIR SPACE'                => "\xe2\x80\x8a",
178
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
179
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
180
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
181
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
182
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
183
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
184
  );
185
186
  /**
187
   * bidirectional text chars
188
   *
189
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
190
   *
191
   * @var array
192
   */
193
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
194
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
195
    8234 => "\xE2\x80\xAA",
196
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
197
    8235 => "\xE2\x80\xAB",
198
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
199
    8236 => "\xE2\x80\xAC",
200
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
201
    8237 => "\xE2\x80\xAD",
202
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
203
    8238 => "\xE2\x80\xAE",
204
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
205
    8294 => "\xE2\x81\xA6",
206
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
207
    8295 => "\xE2\x81\xA7",
208
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
209
    8296 => "\xE2\x81\xA8",
210
    // POP DIRECTIONAL ISOLATE
211
    8297 => "\xE2\x81\xA9",
212
  );
213
214
  /**
215
   * @var array
216
   */
217
  private static $COMMON_CASE_FOLD = array(
218
      'ſ'            => 's',
219
      "\xCD\x85"     => 'ι',
220
      'ς'            => 'σ',
221
      "\xCF\x90"     => 'β',
222
      "\xCF\x91"     => 'θ',
223
      "\xCF\x95"     => 'φ',
224
      "\xCF\x96"     => 'π',
225
      "\xCF\xB0"     => 'κ',
226
      "\xCF\xB1"     => 'ρ',
227
      "\xCF\xB5"     => 'ε',
228
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
229
      "\xE1\xBE\xBE" => 'ι',
230
  );
231
232
  /**
233
   * @var array
234
   */
235
  private static $BROKEN_UTF8_FIX = array(
236
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
237
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
238
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
239
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
240
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
241
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
242
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
243
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
244
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
245
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
246
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
247
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
248
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
249
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
250
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
251
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
252
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
253
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
254
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
255
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
256
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
257
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
258
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
259
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
260
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
261
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
262
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
263
      'ü'       => 'ü',
264
      'ä'       => 'ä',
265
      'ö'       => 'ö',
266
      'Ö'       => 'Ö',
267
      'ß'       => 'ß',
268
      'Ã '       => 'à',
269
      'á'       => 'á',
270
      'â'       => 'â',
271
      'ã'       => 'ã',
272
      'ù'       => 'ù',
273
      'ú'       => 'ú',
274
      'û'       => 'û',
275
      'Ù'       => 'Ù',
276
      'Ú'       => 'Ú',
277
      'Û'       => 'Û',
278
      'Ü'       => 'Ü',
279
      'ò'       => 'ò',
280
      'ó'       => 'ó',
281
      'ô'       => 'ô',
282
      'è'       => 'è',
283
      'é'       => 'é',
284
      'ê'       => 'ê',
285
      'ë'       => 'ë',
286
      'À'       => 'À',
287
      'Á'       => 'Á',
288
      'Â'       => 'Â',
289
      'Ã'       => 'Ã',
290
      'Ä'       => 'Ä',
291
      'Ã…'       => 'Å',
292
      'Ç'       => 'Ç',
293
      'È'       => 'È',
294
      'É'       => 'É',
295
      'Ê'       => 'Ê',
296
      'Ë'       => 'Ë',
297
      'ÃŒ'       => 'Ì',
298
      'Í'       => 'Í',
299
      'ÃŽ'       => 'Î',
300
      'Ï'       => 'Ï',
301
      'Ñ'       => 'Ñ',
302
      'Ã’'       => 'Ò',
303
      'Ó'       => 'Ó',
304
      'Ô'       => 'Ô',
305
      'Õ'       => 'Õ',
306
      'Ø'       => 'Ø',
307
      'Ã¥'       => 'å',
308
      'æ'       => 'æ',
309
      'ç'       => 'ç',
310
      'ì'       => 'ì',
311
      'í'       => 'í',
312
      'î'       => 'î',
313
      'ï'       => 'ï',
314
      'ð'       => 'ð',
315
      'ñ'       => 'ñ',
316
      'õ'       => 'õ',
317
      'ø'       => 'ø',
318
      'ý'       => 'ý',
319
      'ÿ'       => 'ÿ',
320
      '€'      => '€',
321
      '’'      => '’',
322
  );
323
324
  /**
325
   * @var array
326
   */
327
  private static $UTF8_TO_WIN1252 = array(
328
      "\xe2\x82\xac" => "\x80", // EURO SIGN
329
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
330
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
331
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
332
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
333
      "\xe2\x80\xa0" => "\x86", // DAGGER
334
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
335
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
336
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
337
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
338
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
339
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
340
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
341
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
342
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
343
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
344
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
345
      "\xe2\x80\xa2" => "\x95", // BULLET
346
      "\xe2\x80\x93" => "\x96", // EN DASH
347
      "\xe2\x80\x94" => "\x97", // EM DASH
348
      "\xcb\x9c"     => "\x98", // SMALL TILDE
349
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
350
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
351
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
352
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
353
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
354
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
355
  );
356
357
  /**
358
   * @var array
359
   */
360
  private static $UTF8_MSWORD = array(
361
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
362
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
363
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
364
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
365
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
366
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
367
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
368
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
369
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
370
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
371
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
372
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
373
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
374
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
375
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
376
  );
377
378
  /**
379
   * @var array
380
   */
381
  private static $ICONV_ENCODING = array(
382
      'ANSI_X3.4-1968',
383
      'ANSI_X3.4-1986',
384
      'ASCII',
385
      'CP367',
386
      'IBM367',
387
      'ISO-IR-6',
388
      'ISO646-US',
389
      'ISO_646.IRV:1991',
390
      'US',
391
      'US-ASCII',
392
      'CSASCII',
393
      'UTF-8',
394
      'ISO-10646-UCS-2',
395
      'UCS-2',
396
      'CSUNICODE',
397
      'UCS-2BE',
398
      'UNICODE-1-1',
399
      'UNICODEBIG',
400
      'CSUNICODE11',
401
      'UCS-2LE',
402
      'UNICODELITTLE',
403
      'ISO-10646-UCS-4',
404
      'UCS-4',
405
      'CSUCS4',
406
      'UCS-4BE',
407
      'UCS-4LE',
408
      'UTF-16',
409
      'UTF-16BE',
410
      'UTF-16LE',
411
      'UTF-32',
412
      'UTF-32BE',
413
      'UTF-32LE',
414
      'UNICODE-1-1-UTF-7',
415
      'UTF-7',
416
      'CSUNICODE11UTF7',
417
      'UCS-2-INTERNAL',
418
      'UCS-2-SWAPPED',
419
      'UCS-4-INTERNAL',
420
      'UCS-4-SWAPPED',
421
      'C99',
422
      'JAVA',
423
      'CP819',
424
      'IBM819',
425
      'ISO-8859-1',
426
      'ISO-IR-100',
427
      'ISO8859-1',
428
      'ISO_8859-1',
429
      'ISO_8859-1:1987',
430
      'L1',
431
      'LATIN1',
432
      'CSISOLATIN1',
433
      'ISO-8859-2',
434
      'ISO-IR-101',
435
      'ISO8859-2',
436
      'ISO_8859-2',
437
      'ISO_8859-2:1987',
438
      'L2',
439
      'LATIN2',
440
      'CSISOLATIN2',
441
      'ISO-8859-3',
442
      'ISO-IR-109',
443
      'ISO8859-3',
444
      'ISO_8859-3',
445
      'ISO_8859-3:1988',
446
      'L3',
447
      'LATIN3',
448
      'CSISOLATIN3',
449
      'ISO-8859-4',
450
      'ISO-IR-110',
451
      'ISO8859-4',
452
      'ISO_8859-4',
453
      'ISO_8859-4:1988',
454
      'L4',
455
      'LATIN4',
456
      'CSISOLATIN4',
457
      'CYRILLIC',
458
      'ISO-8859-5',
459
      'ISO-IR-144',
460
      'ISO8859-5',
461
      'ISO_8859-5',
462
      'ISO_8859-5:1988',
463
      'CSISOLATINCYRILLIC',
464
      'ARABIC',
465
      'ASMO-708',
466
      'ECMA-114',
467
      'ISO-8859-6',
468
      'ISO-IR-127',
469
      'ISO8859-6',
470
      'ISO_8859-6',
471
      'ISO_8859-6:1987',
472
      'CSISOLATINARABIC',
473
      'ECMA-118',
474
      'ELOT_928',
475
      'GREEK',
476
      'GREEK8',
477
      'ISO-8859-7',
478
      'ISO-IR-126',
479
      'ISO8859-7',
480
      'ISO_8859-7',
481
      'ISO_8859-7:1987',
482
      'ISO_8859-7:2003',
483
      'CSISOLATINGREEK',
484
      'HEBREW',
485
      'ISO-8859-8',
486
      'ISO-IR-138',
487
      'ISO8859-8',
488
      'ISO_8859-8',
489
      'ISO_8859-8:1988',
490
      'CSISOLATINHEBREW',
491
      'ISO-8859-9',
492
      'ISO-IR-148',
493
      'ISO8859-9',
494
      'ISO_8859-9',
495
      'ISO_8859-9:1989',
496
      'L5',
497
      'LATIN5',
498
      'CSISOLATIN5',
499
      'ISO-8859-10',
500
      'ISO-IR-157',
501
      'ISO8859-10',
502
      'ISO_8859-10',
503
      'ISO_8859-10:1992',
504
      'L6',
505
      'LATIN6',
506
      'CSISOLATIN6',
507
      'ISO-8859-11',
508
      'ISO8859-11',
509
      'ISO_8859-11',
510
      'ISO-8859-13',
511
      'ISO-IR-179',
512
      'ISO8859-13',
513
      'ISO_8859-13',
514
      'L7',
515
      'LATIN7',
516
      'ISO-8859-14',
517
      'ISO-CELTIC',
518
      'ISO-IR-199',
519
      'ISO8859-14',
520
      'ISO_8859-14',
521
      'ISO_8859-14:1998',
522
      'L8',
523
      'LATIN8',
524
      'ISO-8859-15',
525
      'ISO-IR-203',
526
      'ISO8859-15',
527
      'ISO_8859-15',
528
      'ISO_8859-15:1998',
529
      'LATIN-9',
530
      'ISO-8859-16',
531
      'ISO-IR-226',
532
      'ISO8859-16',
533
      'ISO_8859-16',
534
      'ISO_8859-16:2001',
535
      'L10',
536
      'LATIN10',
537
      'KOI8-R',
538
      'CSKOI8R',
539
      'KOI8-U',
540
      'KOI8-RU',
541
      'CP1250',
542
      'MS-EE',
543
      'WINDOWS-1250',
544
      'CP1251',
545
      'MS-CYRL',
546
      'WINDOWS-1251',
547
      'CP1252',
548
      'MS-ANSI',
549
      'WINDOWS-1252',
550
      'CP1253',
551
      'MS-GREEK',
552
      'WINDOWS-1253',
553
      'CP1254',
554
      'MS-TURK',
555
      'WINDOWS-1254',
556
      'CP1255',
557
      'MS-HEBR',
558
      'WINDOWS-1255',
559
      'CP1256',
560
      'MS-ARAB',
561
      'WINDOWS-1256',
562
      'CP1257',
563
      'WINBALTRIM',
564
      'WINDOWS-1257',
565
      'CP1258',
566
      'WINDOWS-1258',
567
      '850',
568
      'CP850',
569
      'IBM850',
570
      'CSPC850MULTILINGUAL',
571
      '862',
572
      'CP862',
573
      'IBM862',
574
      'CSPC862LATINHEBREW',
575
      '866',
576
      'CP866',
577
      'IBM866',
578
      'CSIBM866',
579
      'MAC',
580
      'MACINTOSH',
581
      'MACROMAN',
582
      'CSMACINTOSH',
583
      'MACCENTRALEUROPE',
584
      'MACICELAND',
585
      'MACCROATIAN',
586
      'MACROMANIA',
587
      'MACCYRILLIC',
588
      'MACUKRAINE',
589
      'MACGREEK',
590
      'MACTURKISH',
591
      'MACHEBREW',
592
      'MACARABIC',
593
      'MACTHAI',
594
      'HP-ROMAN8',
595
      'R8',
596
      'ROMAN8',
597
      'CSHPROMAN8',
598
      'NEXTSTEP',
599
      'ARMSCII-8',
600
      'GEORGIAN-ACADEMY',
601
      'GEORGIAN-PS',
602
      'KOI8-T',
603
      'CP154',
604
      'CYRILLIC-ASIAN',
605
      'PT154',
606
      'PTCP154',
607
      'CSPTCP154',
608
      'KZ-1048',
609
      'RK1048',
610
      'STRK1048-2002',
611
      'CSKZ1048',
612
      'MULELAO-1',
613
      'CP1133',
614
      'IBM-CP1133',
615
      'ISO-IR-166',
616
      'TIS-620',
617
      'TIS620',
618
      'TIS620-0',
619
      'TIS620.2529-1',
620
      'TIS620.2533-0',
621
      'TIS620.2533-1',
622
      'CP874',
623
      'WINDOWS-874',
624
      'VISCII',
625
      'VISCII1.1-1',
626
      'CSVISCII',
627
      'TCVN',
628
      'TCVN-5712',
629
      'TCVN5712-1',
630
      'TCVN5712-1:1993',
631
      'ISO-IR-14',
632
      'ISO646-JP',
633
      'JIS_C6220-1969-RO',
634
      'JP',
635
      'CSISO14JISC6220RO',
636
      'JISX0201-1976',
637
      'JIS_X0201',
638
      'X0201',
639
      'CSHALFWIDTHKATAKANA',
640
      'ISO-IR-87',
641
      'JIS0208',
642
      'JIS_C6226-1983',
643
      'JIS_X0208',
644
      'JIS_X0208-1983',
645
      'JIS_X0208-1990',
646
      'X0208',
647
      'CSISO87JISX0208',
648
      'ISO-IR-159',
649
      'JIS_X0212',
650
      'JIS_X0212-1990',
651
      'JIS_X0212.1990-0',
652
      'X0212',
653
      'CSISO159JISX02121990',
654
      'CN',
655
      'GB_1988-80',
656
      'ISO-IR-57',
657
      'ISO646-CN',
658
      'CSISO57GB1988',
659
      'CHINESE',
660
      'GB_2312-80',
661
      'ISO-IR-58',
662
      'CSISO58GB231280',
663
      'CN-GB-ISOIR165',
664
      'ISO-IR-165',
665
      'ISO-IR-149',
666
      'KOREAN',
667
      'KSC_5601',
668
      'KS_C_5601-1987',
669
      'KS_C_5601-1989',
670
      'CSKSC56011987',
671
      'EUC-JP',
672
      'EUCJP',
673
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
674
      'CSEUCPKDFMTJAPANESE',
675
      'MS_KANJI',
676
      'SHIFT-JIS',
677
      'SHIFT_JIS',
678
      'SJIS',
679
      'CSSHIFTJIS',
680
      'CP932',
681
      'ISO-2022-JP',
682
      'CSISO2022JP',
683
      'ISO-2022-JP-1',
684
      'ISO-2022-JP-2',
685
      'CSISO2022JP2',
686
      'CN-GB',
687
      'EUC-CN',
688
      'EUCCN',
689
      'GB2312',
690
      'CSGB2312',
691
      'GBK',
692
      'CP936',
693
      'MS936',
694
      'WINDOWS-936',
695
      'GB18030',
696
      'ISO-2022-CN',
697
      'CSISO2022CN',
698
      'ISO-2022-CN-EXT',
699
      'HZ',
700
      'HZ-GB-2312',
701
      'EUC-TW',
702
      'EUCTW',
703
      'CSEUCTW',
704
      'BIG-5',
705
      'BIG-FIVE',
706
      'BIG5',
707
      'BIGFIVE',
708
      'CN-BIG5',
709
      'CSBIG5',
710
      'CP950',
711
      'BIG5-HKSCS:1999',
712
      'BIG5-HKSCS:2001',
713
      'BIG5-HKSCS',
714
      'BIG5-HKSCS:2004',
715
      'BIG5HKSCS',
716
      'EUC-KR',
717
      'EUCKR',
718
      'CSEUCKR',
719
      'CP949',
720
      'UHC',
721
      'CP1361',
722
      'JOHAB',
723
      'ISO-2022-KR',
724
      'CSISO2022KR',
725
      'CP856',
726
      'CP922',
727
      'CP943',
728
      'CP1046',
729
      'CP1124',
730
      'CP1129',
731
      'CP1161',
732
      'IBM-1161',
733
      'IBM1161',
734
      'CSIBM1161',
735
      'CP1162',
736
      'IBM-1162',
737
      'IBM1162',
738
      'CSIBM1162',
739
      'CP1163',
740
      'IBM-1163',
741
      'IBM1163',
742
      'CSIBM1163',
743
      'DEC-KANJI',
744
      'DEC-HANYU',
745
      '437',
746
      'CP437',
747
      'IBM437',
748
      'CSPC8CODEPAGE437',
749
      'CP737',
750
      'CP775',
751
      'IBM775',
752
      'CSPC775BALTIC',
753
      '852',
754
      'CP852',
755
      'IBM852',
756
      'CSPCP852',
757
      'CP853',
758
      '855',
759
      'CP855',
760
      'IBM855',
761
      'CSIBM855',
762
      '857',
763
      'CP857',
764
      'IBM857',
765
      'CSIBM857',
766
      'CP858',
767
      '860',
768
      'CP860',
769
      'IBM860',
770
      'CSIBM860',
771
      '861',
772
      'CP-IS',
773
      'CP861',
774
      'IBM861',
775
      'CSIBM861',
776
      '863',
777
      'CP863',
778
      'IBM863',
779
      'CSIBM863',
780
      'CP864',
781
      'IBM864',
782
      'CSIBM864',
783
      '865',
784
      'CP865',
785
      'IBM865',
786
      'CSIBM865',
787
      '869',
788
      'CP-GR',
789
      'CP869',
790
      'IBM869',
791
      'CSIBM869',
792
      'CP1125',
793
      'EUC-JISX0213',
794
      'SHIFT_JISX0213',
795
      'ISO-2022-JP-3',
796
      'BIG5-2003',
797
      'ISO-IR-230',
798
      'TDS565',
799
      'ATARI',
800
      'ATARIST',
801
      'RISCOS-LATIN1',
802
  );
803
804
  /**
805
   * @var array
806
   */
807 1
  private static $SUPPORT = array();
808
809 1
  /**
810 1
   * __construct()
811
   */
812
  public function __construct()
813
  {
814
    self::checkForSupport();
815
  }
816
817
  /**
818
   * Return the character at the specified position: $str[1] like functionality.
819
   *
820 2
   * @param string $str <p>A UTF-8 string.</p>
821
   * @param int    $pos <p>The position of character to return.</p>
822 2
   *
823
   * @return string <p>Single Multi-Byte character.</p>
824
   */
825
  public static function access($str, $pos)
826
  {
827
    $str = (string)$str;
828
    $pos = (int)$pos;
829
830
    if (!isset($str[0])) {
831
      return '';
832
    }
833
834 1
    if ($pos < 0) {
835
      return '';
836 1
    }
837 1
838 1
    return self::substr($str, $pos, 1);
839
  }
840 1
841
  /**
842
   * Prepends UTF-8 BOM character to the string and returns the whole string.
843
   *
844
   * INFO: If BOM already existed there, the Input string is returned.
845
   *
846
   * @param string $str <p>The input string.</p>
847
   *
848
   * @return string <p>The output string that contains BOM.</p>
849
   */
850 1
  public static function add_bom_to_string($str)
851
  {
852 1
    if (self::string_has_bom($str) === false) {
853
      $str = self::bom() . $str;
854
    }
855
856
    return $str;
857
  }
858
859
  /**
860 2
   * Convert binary into an string.
861
   *
862 2
   * @param mixed $bin 1|0
863
   *
864
   * @return string
865
   */
866
  public static function binary_to_str($bin)
867
  {
868
    if (!isset($bin[0])) {
869
      return '';
870
    }
871
872
    return pack('H*', base_convert($bin, 2, 16));
873
  }
874 1
875
  /**
876 1
   * Returns the UTF-8 Byte Order Mark Character.
877
   *
878
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
879
   *
880
   * @return string UTF-8 Byte Order Mark
881
   */
882
  public static function bom()
883
  {
884 2
    return "\xef\xbb\xbf";
885
  }
886 2
887
  /**
888 1
   * @alias of UTF8::chr_map()
889
   *
890 1
   * @see   UTF8::chr_map()
891 1
   *
892 1
   * @param string|array $callback
893 1
   * @param string       $str
894 1
   *
895 1
   * @return array
896 2
   */
897
  public static function callback($callback, $str)
898
  {
899
    return self::chr_map($callback, $str);
900
  }
901
902
  /**
903
   * This method will auto-detect your server environment for UTF-8 support.
904
   *
905
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
906
   */
907 9
  public static function checkForSupport()
908
  {
909 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
910 9
911 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
912
913
      // http://php.net/manual/en/book.mbstring.php
914 9
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
915
916
      if (
917
          defined('MB_OVERLOAD_STRING')
918 9
          &&
919
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
920
      ) {
921
        self::$SUPPORT['mbstring_func_overload'] = true;
922
      } else {
923 9
        self::$SUPPORT['mbstring_func_overload'] = false;
924 9
      }
925 8
926
      // http://php.net/manual/en/book.iconv.php
927
      self::$SUPPORT['iconv'] = self::iconv_loaded();
928
929 8
      // http://php.net/manual/en/book.intl.php
930 6
      self::$SUPPORT['intl'] = self::intl_loaded();
931
      self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
932
933 7
      // http://php.net/manual/en/class.intlchar.php
934 6
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
935 6
936
      // http://php.net/manual/en/book.pcre.php
937
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
938 7
    }
939 7
  }
940 7
941 7
  /**
942
   * Generates a UTF-8 encoded character from the given code point.
943
   *
944 1
   * INFO: opposite to UTF8::ord()
945 1
   *
946 1
   * @param int    $code_point <p>The code point for which to generate a character.</p>
947 1
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
948 1
   *
949
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
950
   */
951
  public static function chr($code_point, $encoding = 'UTF-8')
952
  {
953
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
954
      self::checkForSupport();
955
    }
956
957
    if ($encoding !== 'UTF-8') {
958
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
959
    } elseif (self::$SUPPORT['intlChar'] === true) {
960
      return \IntlChar::chr($code_point);
961
    }
962
963 1
    // check type of code_point, only if there is no support for "\IntlChar"
964
    $i = (int)$code_point;
965 1
    if ($i !== $code_point) {
966
      return null;
967 1
    }
968
969
    // use static cache, only if there is no support for "\IntlChar"
970
    static $CHAR_CACHE = array();
971
    $cacheKey = $code_point . $encoding;
972
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
973
      return $CHAR_CACHE[$cacheKey];
974
    }
975
976
    if (0x80 > $code_point %= 0x200000) {
977
      $str = self::chr_and_parse_int($code_point);
978
    } elseif (0x800 > $code_point) {
979
      $str = self::chr_and_parse_int(0xC0 | $code_point >> 6) .
980
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
981
    } elseif (0x10000 > $code_point) {
982 4
      $str = self::chr_and_parse_int(0xE0 | $code_point >> 12) .
983
             self::chr_and_parse_int(0x80 | $code_point >> 6 & 0x3F) .
984 4
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
985 3
    } else {
986
      $str = self::chr_and_parse_int(0xF0 | $code_point >> 18) .
987
             self::chr_and_parse_int(0x80 | $code_point >> 12 & 0x3F) .
988 4
             self::chr_and_parse_int(0x80 | $code_point >> 6 & 0x3F) .
989
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
990
    }
991
992
    if ($encoding !== 'UTF-8') {
993
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
994
    }
995
996
    // add into static cache
997
    $CHAR_CACHE[$cacheKey] = $str;
998 2
999
    return $str;
1000 2
  }
1001 2
1002 2
  /**
1003
   * @param int $int
1004 2
   *
1005
   * @return string
1006 2
   */
1007
  private static function chr_and_parse_int($int)
1008
  {
1009 2
    return chr((int)$int);
1010
  }
1011 2
1012 2
  /**
1013 2
   * Applies callback to all characters of a string.
1014
   *
1015 1
   * @param string|array $callback <p>The callback function.</p>
1016 1
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1017 1
   *
1018
   * @return array <p>The outcome of callback.</p>
1019
   */
1020
  public static function chr_map($callback, $str)
1021
  {
1022
    $chars = self::split($str);
1023 2
1024
    return array_map($callback, $chars);
1025 2
  }
1026 2
1027
  /**
1028 2
   * Generates an array of byte length of each character of a Unicode string.
1029
   *
1030
   * 1 byte => U+0000  - U+007F
1031
   * 2 byte => U+0080  - U+07FF
1032
   * 3 byte => U+0800  - U+FFFF
1033
   * 4 byte => U+10000 - U+10FFFF
1034
   *
1035
   * @param string $str <p>The original Unicode string.</p>
1036
   *
1037
   * @return array <p>An array of byte lengths of each character.</p>
1038
   */
1039 1
  public static function chr_size_list($str)
1040
  {
1041 1
    $str = (string)$str;
1042
1043
    if (!isset($str[0])) {
1044
      return array();
1045
    }
1046
1047
    return array_map(
1048
        function ($data) {
1049
          return UTF8::strlen($data, '8BIT');
1050
        },
1051
        self::split($str)
1052
    );
1053 1
  }
1054
1055 1
  /**
1056
   * Get a decimal code representation of a specific character.
1057
   *
1058
   * @param string $char <p>The input character.</p>
1059
   *
1060
   * @return int
1061
   */
1062
  public static function chr_to_decimal($char)
1063
  {
1064
    $char = (string)$char;
1065
    $code = self::ord($char[0]);
1066
    $bytes = 1;
1067
1068
    if (!($code & 0x80)) {
1069
      // 0xxxxxxx
1070
      return $code;
1071 44
    }
1072
1073
    if (($code & 0xe0) === 0xc0) {
1074
      // 110xxxxx
1075
      $bytes = 2;
1076
      $code &= ~0xc0;
1077
    } elseif (($code & 0xf0) === 0xe0) {
1078
      // 1110xxxx
1079
      $bytes = 3;
1080
      $code &= ~0xe0;
1081
    } elseif (($code & 0xf8) === 0xf0) {
1082
      // 11110xxx
1083
      $bytes = 4;
1084
      $code &= ~0xf0;
1085
    }
1086 44
1087 44
    for ($i = 2; $i <= $bytes; $i++) {
1088
      // 10xxxxxx
1089 44
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1090 44
    }
1091
1092 44
    return $code;
1093 17
  }
1094 17
1095
  /**
1096 44
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1097 12
   *
1098 12
   * @param string $char <p>The input character</p>
1099
   * @param string $pfix [optional]
1100 44
   *
1101 5
   * @return string <p>The code point encoded as U+xxxx<p>
1102 5
   */
1103
  public static function chr_to_hex($char, $pfix = 'U+')
1104 44
  {
1105
    $char = (string)$char;
1106
1107
    if (!isset($char[0])) {
1108
      return '';
1109
    }
1110
1111
    if ($char === '&#0;') {
1112
      $char = '';
1113
    }
1114 4
1115
    return self::int_to_hex(self::ord($char), $pfix);
1116 4
  }
1117
1118 4
  /**
1119 1
   * alias for "UTF8::chr_to_decimal()"
1120
   *
1121
   * @see UTF8::chr_to_decimal()
1122
   *
1123 4
   * @param string $chr
1124
   *
1125
   * @return int
1126
   */
1127
  public static function chr_to_int($chr)
1128
  {
1129
    return self::chr_to_decimal($chr);
1130 4
  }
1131
1132 4
  /**
1133
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1134
   *
1135
   * @param string $body     <p>The original string to be split.</p>
1136
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1137
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1138
   *
1139
   * @return string <p>The chunked string</p>
1140
   */
1141
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1142
  {
1143
    return implode($end, self::split($body, $chunklen));
1144
  }
1145
1146 5
  /**
1147
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1148 5
   *
1149 5
   * @param string $str                     <p>The string to be sanitized.</p>
1150 5
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1151
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1152 5
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1153
   *                                        => "..."</p>
1154 5
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1155 5
   *                                        $normalize_whitespace</p>
1156 5
   *
1157
   * @return string <p>Clean UTF-8 encoded string.</p>
1158 5
   */
1159
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1160 5
  {
1161 1
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1162
    // caused connection reset problem on larger strings
1163 1
1164 1
    $regx = '/
1165 1
      (
1166
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1167 1
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1168 1
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1169
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1170 5
        ){1,100}                      # ...one or more times
1171
      )
1172
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1173
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1174
    /x';
1175
    $str = preg_replace($regx, '$1', $str);
1176
1177
    $str = self::replace_diamond_question_mark($str, '');
1178
    $str = self::remove_invisible_characters($str);
1179
1180
    if ($normalize_whitespace === true) {
1181
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1182 6
    }
1183
1184 6
    if ($normalize_msword === true) {
1185
      $str = self::normalize_msword($str);
1186
    }
1187
1188
    if ($remove_bom === true) {
1189
      $str = self::remove_bom($str);
1190
    }
1191
1192
    return $str;
1193
  }
1194 1
1195
  /**
1196 1
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1197 1
   *
1198 1
   * @param string $str <p>The input string.</p>
1199
   *
1200 1
   * @return string
1201
   */
1202
  public static function cleanup($str)
1203
  {
1204
    $str = (string)$str;
1205
1206
    if (!isset($str[0])) {
1207
      return '';
1208
    }
1209
1210
    // fixed ISO <-> UTF-8 Errors
1211
    $str = self::fix_simple_utf8($str);
1212
1213
    // remove all none UTF-8 symbols
1214
    // && remove diamond question mark (�)
1215
    // && remove remove invisible characters (e.g. "\0")
1216 11
    // && remove BOM
1217
    // && normalize whitespace chars (but keep non-breaking-spaces)
1218 11
    $str = self::clean($str, true, true, false, true);
1219 11
1220
    return (string)$str;
1221 11
  }
1222 5
1223
  /**
1224
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1225 11
   *
1226 1
   * INFO: opposite to UTF8::string()
1227 1
   *
1228
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1229 11
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1230
   *                                    default, code points will be returned as integers.</p>
1231
   *
1232
   * @return array <p>The array of code points.</p>
1233 11
   */
1234
  public static function codepoints($arg, $u_style = false)
1235
  {
1236 11
    if (is_string($arg) === true) {
1237
      $arg = self::split($arg);
1238 1
    }
1239 11
1240
    $arg = array_map(
1241
        array(
1242
            '\\voku\\helper\\UTF8',
1243 11
            'ord',
1244
        ),
1245
        $arg
1246 11
    );
1247 1
1248 1
    if ($u_style) {
1249 1
      $arg = array_map(
1250 11
          array(
1251 11
              '\\voku\\helper\\UTF8',
1252
              'int_to_hex',
1253
          ),
1254
          $arg
1255
      );
1256 2
    }
1257
1258
    return $arg;
1259 1
  }
1260
1261
  /**
1262 2
   * Returns count of characters used in a string.
1263 1
   *
1264
   * @param string $str       <p>The input string.</p>
1265
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1266 2
   *
1267 2
   * @return array <p>An associative array of Character as keys and
1268 2
   *               their count as values.</p>
1269
   */
1270 2
  public static function count_chars($str, $cleanUtf8 = false)
1271
  {
1272 2
    return array_count_values(self::split($str, 1, $cleanUtf8));
1273 2
  }
1274
1275
  /**
1276
   * Converts a int-value into an UTF-8 character.
1277 1
   *
1278
   * @param mixed $int
1279
   *
1280
   * @return string
1281
   */
1282
  public static function decimal_to_chr($int)
1283
  {
1284
    if (Bootup::is_php('5.4') === true) {
1285
      $flags = ENT_QUOTES | ENT_HTML5;
1286
    } else {
1287
      $flags = ENT_QUOTES;
1288
    }
1289
1290
    return self::html_entity_decode('&#' . $int . ';', $flags);
1291
  }
1292
1293
  /**
1294
   * Encode a string with a new charset-encoding.
1295
   *
1296
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1297
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1298
   *
1299
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1300
   * @param string $str      <p>The input string</p>
1301
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1302
   *                         /> otherwise we auto-detect the current string-encoding</p>
1303
   *
1304
   * @return string
1305
   */
1306
  public static function encode($encoding, $str, $force = true)
1307
  {
1308
    $str = (string)$str;
1309
    $encoding = (string)$encoding;
1310
1311
    if (!isset($str[0], $encoding[0])) {
1312
      return $str;
1313
    }
1314
1315
    if ($encoding !== 'UTF-8') {
1316
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1317
    }
1318
1319
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1320
      self::checkForSupport();
1321
    }
1322
1323
    $encodingDetected = self::str_detect_encoding($str);
1324
1325
    if (
1326
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1327
        &&
1328
        (
1329
            $force === true
1330
            ||
1331
            $encodingDetected !== $encoding
1332
        )
1333
    ) {
1334
1335
      if (
1336
          $encoding === 'UTF-8'
1337
          &&
1338
          (
1339
              $force === true
1340
              || $encodingDetected === 'UTF-8'
1341
              || $encodingDetected === 'WINDOWS-1252'
1342
              || $encodingDetected === 'ISO-8859-1'
1343
          )
1344
      ) {
1345
        return self::to_utf8($str);
1346
      }
1347
1348
      if (
1349
          $encoding === 'ISO-8859-1'
1350
          &&
1351
          (
1352
              $force === true
1353
              || $encodingDetected === 'ISO-8859-1'
1354
              || $encodingDetected === 'UTF-8'
1355
          )
1356
      ) {
1357
        return self::to_iso8859($str);
1358
      }
1359
1360
      if (
1361
          $encoding !== 'UTF-8'
1362 2
          &&
1363
          $encoding !== 'WINDOWS-1252'
1364
          &&
1365 2
          self::$SUPPORT['mbstring'] === false
1366 2
      ) {
1367
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1368 2
      }
1369 2
1370
      $strEncoded = \mb_convert_encoding(
1371
          $str,
1372
          $encoding,
1373 2
          $encodingDetected
1374 2
      );
1375
1376 2
      if ($strEncoded) {
1377 2
        return $strEncoded;
1378
      }
1379 2
    }
1380 1
1381 1
    return $str;
1382 2
  }
1383
1384
  /**
1385
   * Reads entire file into a string.
1386 2
   *
1387 1
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1388
   *
1389
   * @link http://php.net/manual/en/function.file-get-contents.php
1390 1
   *
1391 1
   * @param string        $filename      <p>
1392 1
   *                                     Name of the file to read.
1393 1
   *                                     </p>
1394
   * @param int|false     $flags         [optional] <p>
1395 1
   *                                     Prior to PHP 6, this parameter is called
1396
   *                                     use_include_path and is a bool.
1397
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1398
   *                                     to trigger include path
1399
   *                                     search.
1400
   *                                     </p>
1401
   *                                     <p>
1402
   *                                     The value of flags can be any combination of
1403
   *                                     the following flags (with some restrictions), joined with the
1404
   *                                     binary OR (|)
1405 1
   *                                     operator.
1406
   *                                     </p>
1407 1
   *                                     <p>
1408
   *                                     <table>
1409
   *                                     Available flags
1410
   *                                     <tr valign="top">
1411
   *                                     <td>Flag</td>
1412
   *                                     <td>Description</td>
1413
   *                                     </tr>
1414
   *                                     <tr valign="top">
1415
   *                                     <td>
1416
   *                                     FILE_USE_INCLUDE_PATH
1417
   *                                     </td>
1418
   *                                     <td>
1419 9
   *                                     Search for filename in the include directory.
1420
   *                                     See include_path for more
1421 9
   *                                     information.
1422 9
   *                                     </td>
1423 3
   *                                     </tr>
1424
   *                                     <tr valign="top">
1425 3
   *                                     <td>
1426 3
   *                                     FILE_TEXT
1427 3
   *                                     </td>
1428 9
   *                                     <td>
1429 2
   *                                     As of PHP 6, the default encoding of the read
1430 2
   *                                     data is UTF-8. You can specify a different encoding by creating a
1431 2
   *                                     custom context or by changing the default using
1432 2
   *                                     stream_default_encoding. This flag cannot be
1433 9
   *                                     used with FILE_BINARY.
1434
   *                                     </td>
1435 8
   *                                     </tr>
1436
   *                                     <tr valign="top">
1437 2
   *                                     <td>
1438 2
   *                                     FILE_BINARY
1439
   *                                     </td>
1440 8
   *                                     <td>
1441
   *                                     With this flag, the file is read in binary mode. This is the default
1442 8
   *                                     setting and cannot be used with FILE_TEXT.
1443 6
   *                                     </td>
1444 6
   *                                     </tr>
1445 6
   *                                     </table>
1446
   *                                     </p>
1447 6
   * @param resource|null $context       [optional] <p>
1448 3
   *                                     A valid context resource created with
1449 3
   *                                     stream_context_create. If you don't need to use a
1450 5
   *                                     custom context, you can skip this parameter by &null;.
1451
   *                                     </p>
1452
   * @param int|null      $offset        [optional] <p>
1453
   *                                     The offset where the reading starts.
1454
   *                                     </p>
1455 8
   * @param int|null      $maxlen        [optional] <p>
1456 8
   *                                     Maximum length of data read. The default is to read until end
1457 5
   *                                     of file is reached.
1458 8
   *                                     </p>
1459
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1460
   *
1461 2
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1462 2
   *                                     or pdf, because they used non default utf-8 chars</p>
1463 8
   *
1464 8
   * @return string <p>The function returns the read data or false on failure.</p>
1465 9
   */
1466
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1467 9
  {
1468
    // init
1469
    $timeout = (int)$timeout;
1470
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1471
1472
    if ($timeout && $context === null) {
1473
      $context = stream_context_create(
1474
          array(
1475
              'http' =>
1476
                  array(
1477
                      'timeout' => $timeout,
1478
                  ),
1479
          )
1480
      );
1481
    }
1482
1483
    if (!$flags) {
1484
      $flags = false;
1485
    }
1486
1487
    if ($offset === null) {
1488
      $offset = 0;
1489
    }
1490
1491
    if (is_int($maxlen) === true) {
1492
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1493
    } else {
1494
      $data = file_get_contents($filename, $flags, $context, $offset);
1495
    }
1496
1497
    // return false on error
1498
    if ($data === false) {
1499
      return false;
1500
    }
1501
1502
    if ($convertToUtf8 === true) {
1503
      $data = self::encode('UTF-8', $data, false);
1504
      $data = self::cleanup($data);
1505
    }
1506
1507
    return $data;
1508
  }
1509
1510
  /**
1511
   * Checks if a file starts with BOM (Byte Order Mark) character.
1512
   *
1513
   * @param string $file_path <p>Path to a valid file.</p>
1514
   *
1515
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1516
   */
1517
  public static function file_has_bom($file_path)
1518
  {
1519
    return self::string_has_bom(file_get_contents($file_path));
1520 1
  }
1521
1522 1
  /**
1523 1
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1524 1
   *
1525 1
   * @param mixed  $var
1526
   * @param int    $normalization_form
1527
   * @param string $leading_combining
1528 1
   *
1529
   * @return mixed
1530
   */
1531
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1532
  {
1533
    switch (gettype($var)) {
1534 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1535
        foreach ($var as $k => $v) {
1536
          /** @noinspection AlterInForeachInspection */
1537
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1538
        }
1539
        break;
1540 1 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1541
        foreach ($var as $k => $v) {
1542 1
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1543 1
        }
1544 1
        break;
1545 1
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1546
1547
        if (false !== strpos($var, "\r")) {
1548 1
          // Workaround https://bugs.php.net/65732
1549
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1550
        }
1551
1552
        if (self::is_ascii($var) === false) {
1553
          /** @noinspection PhpUndefinedClassInspection */
1554
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1555
            $n = '-';
1556
          } else {
1557
            /** @noinspection PhpUndefinedClassInspection */
1558
            $n = \Normalizer::normalize($var, $normalization_form);
1559 1
1560
            if (isset($n[0])) {
1561 1
              $var = $n;
1562
            } else {
1563
              $var = self::encode('UTF-8', $var);
1564
            }
1565
          }
1566
1567
          if (
1568
              $var[0] >= "\x80"
1569
              &&
1570
              isset($n[0], $leading_combining[0])
1571
              &&
1572
              preg_match('/^\p{Mn}/u', $var)
1573
          ) {
1574
            // Prevent leading combining chars
1575
            // for NFC-safe concatenations.
1576
            $var = $leading_combining . $var;
1577 7
          }
1578
        }
1579 7
1580 7
        break;
1581
    }
1582 7
1583
    return $var;
1584 7
  }
1585 2
1586
  /**
1587
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1588 7
   *
1589 1
   * Gets a specific external variable by name and optionally filters it
1590 1
   *
1591 1
   * @link  http://php.net/manual/en/function.filter-input.php
1592
   *
1593 7
   * @param int    $type          <p>
1594
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1595
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1596
   *                              <b>INPUT_ENV</b>.
1597
   *                              </p>
1598
   * @param string $variable_name <p>
1599
   *                              Name of a variable to get.
1600
   *                              </p>
1601
   * @param int    $filter        [optional] <p>
1602
   *                              The ID of the filter to apply. The
1603 1
   *                              manual page lists the available filters.
1604
   *                              </p>
1605 1
   * @param mixed  $options       [optional] <p>
1606
   *                              Associative array of options or bitwise disjunction of flags. If filter
1607 1
   *                              accepts options, flags can be provided in "flags" field of array.
1608
   *                              </p>
1609
   *
1610 1
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1611 1
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1612
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1613 1
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1614
   * @since 5.2.0
1615
   */
1616 1 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1617 1
  {
1618 1
    if (4 > func_num_args()) {
1619 1
      $var = filter_input($type, $variable_name, $filter);
1620 1
    } else {
1621
      $var = filter_input($type, $variable_name, $filter, $options);
1622 1
    }
1623
1624
    return self::filter($var);
1625
  }
1626
1627
  /**
1628
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1629
   *
1630
   * Gets external variables and optionally filters them
1631
   *
1632 1
   * @link  http://php.net/manual/en/function.filter-input-array.php
1633
   *
1634 1
   * @param int   $type       <p>
1635
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1636
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1637
   *                          <b>INPUT_ENV</b>.
1638 1
   *                          </p>
1639
   * @param mixed $definition [optional] <p>
1640
   *                          An array defining the arguments. A valid key is a string
1641
   *                          containing a variable name and a valid value is either a filter type, or an array
1642
   *                          optionally specifying the filter, flags and options. If the value is an
1643
   *                          array, valid keys are filter which specifies the
1644
   *                          filter type,
1645
   *                          flags which specifies any flags that apply to the
1646
   *                          filter, and options which specifies any options that
1647
   *                          apply to the filter. See the example below for a better understanding.
1648
   *                          </p>
1649
   *                          <p>
1650
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1651
   *                          input array are filtered by this filter.
1652
   *                          </p>
1653
   * @param bool  $add_empty  [optional] <p>
1654 1
   *                          Add missing keys as <b>NULL</b> to the return value.
1655
   *                          </p>
1656 1
   *
1657 1
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1658
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1659
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1660 1
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1661
   * fails.
1662 1
   * @since 5.2.0
1663 1
   */
1664 1 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1665 1
  {
1666 1
    if (2 > func_num_args()) {
1667 1
      $a = filter_input_array($type);
1668 1
    } else {
1669 1
      $a = filter_input_array($type, $definition, $add_empty);
1670 1
    }
1671 1
1672 1
    return self::filter($a);
1673
  }
1674
1675
  /**
1676
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1677
   *
1678
   * Filters a variable with a specified filter
1679
   *
1680
   * @link  http://php.net/manual/en/function.filter-var.php
1681
   *
1682
   * @param mixed $variable <p>
1683
   *                        Value to filter.
1684
   *                        </p>
1685
   * @param int   $filter   [optional] <p>
1686
   *                        The ID of the filter to apply. The
1687
   *                        manual page lists the available filters.
1688
   *                        </p>
1689
   * @param mixed $options  [optional] <p>
1690
   *                        Associative array of options or bitwise disjunction of flags. If filter
1691
   *                        accepts options, flags can be provided in "flags" field of array. For
1692 1
   *                        the "callback" filter, callable type should be passed. The
1693 1
   *                        callback must accept one argument, the value to be filtered, and return
1694
   *                        the value after filtering/sanitizing it.
1695
   *                        </p>
1696
   *                        <p>
1697
   *                        <code>
1698
   *                        // for filters that accept options, use this format
1699
   *                        $options = array(
1700
   *                        'options' => array(
1701
   *                        'default' => 3, // value to return if the filter fails
1702
   *                        // other options here
1703
   *                        'min_range' => 0
1704
   *                        ),
1705
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1706
   *                        );
1707
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1708
   *                        // for filter that only accept flags, you can pass them directly
1709
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1710
   *                        // for filter that only accept flags, you can also pass as an array
1711
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1712
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1713
   *                        // callback validate filter
1714
   *                        function foo($value)
1715
   *                        {
1716
   *                        // Expected format: Surname, GivenNames
1717
   *                        if (strpos($value, ", ") === false) return false;
1718
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1719
   *                        $empty = (empty($surname) || empty($givennames));
1720
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1721
   *                        if ($empty || $notstrings) {
1722
   *                        return false;
1723
   *                        } else {
1724
   *                        return $value;
1725
   *                        }
1726
   *                        }
1727
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1728
   *                        </code>
1729
   *                        </p>
1730
   *
1731
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1732
   * @since 5.2.0
1733
   */
1734 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1735
  {
1736
    if (3 > func_num_args()) {
1737
      $variable = filter_var($variable, $filter);
1738
    } else {
1739
      $variable = filter_var($variable, $filter, $options);
1740
    }
1741
1742
    return self::filter($variable);
1743
  }
1744
1745
  /**
1746
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1747
   *
1748
   * Gets multiple variables and optionally filters them
1749
   *
1750
   * @link  http://php.net/manual/en/function.filter-var-array.php
1751
   *
1752 1
   * @param array $data       <p>
1753
   *                          An array with string keys containing the data to filter.
1754 1
   *                          </p>
1755 1
   * @param mixed $definition [optional] <p>
1756
   *                          An array defining the arguments. A valid key is a string
1757 1
   *                          containing a variable name and a valid value is either a
1758
   *                          filter type, or an
1759
   *                          array optionally specifying the filter, flags and options.
1760
   *                          If the value is an array, valid keys are filter
1761
   *                          which specifies the filter type,
1762
   *                          flags which specifies any flags that apply to the
1763
   *                          filter, and options which specifies any options that
1764
   *                          apply to the filter. See the example below for a better understanding.
1765
   *                          </p>
1766
   *                          <p>
1767
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1768
   *                          input array are filtered by this filter.
1769
   *                          </p>
1770
   * @param bool  $add_empty  [optional] <p>
1771
   *                          Add missing keys as <b>NULL</b> to the return value.
1772 1
   *                          </p>
1773
   *
1774 1
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1775
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1776
   * the variable is not set.
1777
   * @since 5.2.0
1778
   */
1779 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1780
  {
1781
    if (2 > func_num_args()) {
1782
      $a = filter_var_array($data);
1783
    } else {
1784
      $a = filter_var_array($data, $definition, $add_empty);
1785
    }
1786 1
1787
    return self::filter($a);
1788 1
  }
1789 1
1790
  /**
1791
   * Check if the number of unicode characters are not more than the specified integer.
1792 1
   *
1793 1
   * @param string $str      The original string to be checked.
1794
   * @param int    $box_size The size in number of chars to be checked against string.
1795
   *
1796 1
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1797
   */
1798
  public static function fits_inside($str, $box_size)
1799
  {
1800
    return (self::strlen($str) <= $box_size);
1801
  }
1802
1803
  /**
1804
   * Try to fix simple broken UTF-8 strings.
1805
   *
1806
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1807
   *
1808
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1809
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1810 1
   * See: http://en.wikipedia.org/wiki/Windows-1252
1811
   *
1812 1
   * @param string $str <p>The input string</p>
1813
   *
1814
   * @return string
1815
   */
1816 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1817
  {
1818
    // init
1819
    $str = (string)$str;
1820
1821
    if (!isset($str[0])) {
1822
      return '';
1823
    }
1824
1825
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1826 2
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1827
1828
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1829 2
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1830
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1831 2
    }
1832 2
1833 1
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1834 1
  }
1835
1836 2
  /**
1837 1
   * Fix a double (or multiple) encoded UTF8 string.
1838 1
   *
1839
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1840 2
   *
1841 2
   * @return mixed
1842 2
   */
1843
  public static function fix_utf8($str)
1844 2
  {
1845
    if (is_array($str) === true) {
1846
1847
      /** @noinspection ForeachSourceInspection */
1848
      foreach ($str as $k => $v) {
1849
        /** @noinspection AlterInForeachInspection */
1850
        /** @noinspection OffsetOperationsInspection */
1851
        $str[$k] = self::fix_utf8($v);
1852
      }
1853
1854
      return $str;
1855
    }
1856
1857
    $last = '';
1858
    while ($last !== $str) {
1859
      $last = $str;
1860
      $str = self::to_utf8(
1861
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1860 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
Security Bug introduced by
It seems like self::utf8_decode($str) targeting voku\helper\UTF8::utf8_decode() can also be of type false; however, voku\helper\UTF8::to_utf8() does only seem to accept string|array<integer,string>, did you maybe forget to handle an error condition?
Loading history...
1862
      );
1863
    }
1864
1865
    return $str;
1866
  }
1867
1868
  /**
1869
   * Get character of a specific character.
1870
   *
1871
   * @param string $char
1872
   *
1873
   * @return string <p>'RTL' or 'LTR'</p>
1874
   */
1875
  public static function getCharDirection($char)
1876
  {
1877
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1878
      self::checkForSupport();
1879
    }
1880
1881
    if (self::$SUPPORT['intlChar'] === true) {
1882
      $tmpReturn = \IntlChar::charDirection($char);
1883
1884
      // from "IntlChar"-Class
1885
      $charDirection = array(
1886
          'RTL' => array(1, 13, 14, 15, 21),
1887
          'LTR' => array(0, 11, 12, 20),
1888
      );
1889
1890
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1891
        return 'LTR';
1892
      }
1893
1894
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1895
        return 'RTL';
1896
      }
1897
    }
1898
1899
    $c = static::chr_to_decimal($char);
1900
1901
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1902
      return 'LTR';
1903
    }
1904
1905
    if (0x85e >= $c) {
1906
1907
      if (0x5be === $c ||
1908
          0x5c0 === $c ||
1909
          0x5c3 === $c ||
1910
          0x5c6 === $c ||
1911
          (0x5d0 <= $c && 0x5ea >= $c) ||
1912
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1913
          0x608 === $c ||
1914
          0x60b === $c ||
1915
          0x60d === $c ||
1916
          0x61b === $c ||
1917
          (0x61e <= $c && 0x64a >= $c) ||
1918
          (0x66d <= $c && 0x66f >= $c) ||
1919
          (0x671 <= $c && 0x6d5 >= $c) ||
1920
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1921
          (0x6ee <= $c && 0x6ef >= $c) ||
1922
          (0x6fa <= $c && 0x70d >= $c) ||
1923
          0x710 === $c ||
1924
          (0x712 <= $c && 0x72f >= $c) ||
1925
          (0x74d <= $c && 0x7a5 >= $c) ||
1926 9
          0x7b1 === $c ||
1927
          (0x7c0 <= $c && 0x7ea >= $c) ||
1928 9
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1929
          0x7fa === $c ||
1930 9
          (0x800 <= $c && 0x815 >= $c) ||
1931 6
          0x81a === $c ||
1932
          0x824 === $c ||
1933
          0x828 === $c ||
1934 9
          (0x830 <= $c && 0x83e >= $c) ||
1935 7
          (0x840 <= $c && 0x858 >= $c) ||
1936
          0x85e === $c
1937
      ) {
1938
        return 'RTL';
1939 9
      }
1940 9
1941
    } elseif (0x200f === $c) {
1942 9
1943 9
      return 'RTL';
1944 9
1945 9
    } elseif (0xfb1d <= $c) {
1946 9
1947 6
      if (0xfb1d === $c ||
1948
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1949
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1950 9
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1951 2
          0xfb3e === $c ||
1952 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1953
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1954 9
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1955 4
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1956 4
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1957 4
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1958
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1959
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1960 4
          (0xfe76 <= $c && 0xfefc >= $c) ||
1961
          (0x10800 <= $c && 0x10805 >= $c) ||
1962
          0x10808 === $c ||
1963 9
          (0x1080a <= $c && 0x10835 >= $c) ||
1964
          (0x10837 <= $c && 0x10838 >= $c) ||
1965 9
          0x1083c === $c ||
1966 9
          (0x1083f <= $c && 0x10855 >= $c) ||
1967
          (0x10857 <= $c && 0x1085f >= $c) ||
1968 7
          (0x10900 <= $c && 0x1091b >= $c) ||
1969
          (0x10920 <= $c && 0x10939 >= $c) ||
1970 7
          0x1093f === $c ||
1971 6
          0x10a00 === $c ||
1972
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1973 4
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1974
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1975 9
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1976
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1977 9
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1978
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1979
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1980 9
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1981 9
          (0x10b78 <= $c && 0x10b7f >= $c)
1982 9
      ) {
1983
        return 'RTL';
1984 9
      }
1985
    }
1986 9
1987
    return 'LTR';
1988 9
  }
1989
1990
  /**
1991
   * get data from "/data/*.ser"
1992
   *
1993
   * @param string $file
1994
   *
1995
   * @return bool|string|array|int <p>Will return false on error.</p>
1996
   */
1997
  private static function getData($file)
1998
  {
1999
    $file = __DIR__ . '/data/' . $file . '.php';
2000
    if (file_exists($file)) {
2001
      /** @noinspection PhpIncludeInspection */
2002
      return require $file;
2003
    }
2004
2005
    return false;
2006
  }
2007
2008
  /**
2009
   * Check for php-support.
2010
   *
2011
   * @param string|null $key
2012
   *
2013
   * @return bool[]|bool|null return the full support-array, if $key === null<br />
2014
   *                          return bool-value, if $key is used and available<br />
2015
   *                          otherwise return null
2016
   */
2017
  public static function getSupportInfo($key = null)
2018
  {
2019
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2020
      self::checkForSupport();
2021
    }
2022
2023
    if ($key === null) {
2024
      return self::$SUPPORT;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return self::$SUPPORT; (array) is incompatible with the return type documented by voku\helper\UTF8::getSupportInfo of type boolean[]|boolean|null.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
2025
    }
2026
2027
    if (!isset(self::$SUPPORT[$key])) {
2028
      return null;
2029
    }
2030
2031
    return self::$SUPPORT[$key];
2032
  }
2033
2034
  /**
2035
   * alias for "UTF8::string_has_bom()"
2036
   *
2037
   * @see UTF8::string_has_bom()
2038
   *
2039
   * @param string $str
2040
   *
2041
   * @return bool
2042
   *
2043
   * @deprecated
2044
   */
2045
  public static function hasBom($str)
2046
  {
2047
    return self::string_has_bom($str);
2048
  }
2049
2050
  /**
2051
   * Converts a hexadecimal-value into an UTF-8 character.
2052
   *
2053
   * @param string $hexdec <p>The hexadecimal value.</p>
2054
   *
2055
   * @return string|false <p>One single UTF-8 character.</p>
2056
   */
2057
  public static function hex_to_chr($hexdec)
2058
  {
2059
    return self::decimal_to_chr(hexdec($hexdec));
2060
  }
2061
2062
  /**
2063
   * Converts hexadecimal U+xxxx code point representation to integer.
2064
   *
2065
   * INFO: opposite to UTF8::int_to_hex()
2066
   *
2067
   * @param string $hexdec <p>The hexadecimal code point representation.</p>
2068
   *
2069
   * @return int|false <p>The code point, or false on failure.</p>
2070
   */
2071
  public static function hex_to_int($hexdec)
2072
  {
2073
    $hexdec = (string)$hexdec;
2074
2075
    if (!isset($hexdec[0])) {
2076
      return false;
2077
    }
2078
2079
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexdec, $match)) {
2080
      return intval($match[1], 16);
2081
    }
2082
2083
    return false;
2084
  }
2085
2086
  /**
2087
   * alias for "UTF8::html_entity_decode()"
2088
   *
2089
   * @see UTF8::html_entity_decode()
2090
   *
2091
   * @param string $str
2092
   * @param int    $flags
2093
   * @param string $encoding
2094 2
   *
2095
   * @return string
2096 2
   */
2097 1
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2098 1
  {
2099
    return self::html_entity_decode($str, $flags, $encoding);
2100 2
  }
2101
2102 2
  /**
2103 1
   * Converts a UTF-8 string to a series of HTML numbered entities.
2104
   *
2105
   * INFO: opposite to UTF8::html_decode()
2106 2
   *
2107 2
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2108 2
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2109 2
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2110 2
   *
2111 1
   * @return string <p>HTML numbered entities.</p>
2112
   */
2113 1
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2114 1
  {
2115 1
    // init
2116 1
    $str = (string)$str;
2117 1
2118 2
    if (!isset($str[0])) {
2119
      return '';
2120 2
    }
2121
2122
    if ($encoding !== 'UTF-8') {
2123
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2124
    }
2125
2126
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2127
    if (function_exists('mb_encode_numericentity')) {
2128
2129
      $startCode = 0x00;
2130
      if ($keepAsciiChars === true) {
2131
        $startCode = 0x80;
2132
      }
2133
2134
      return mb_encode_numericentity(
2135
          $str,
2136
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2137
          $encoding
2138
      );
2139
    }
2140
2141
    return implode(
2142
        '',
2143
        array_map(
2144
            function ($data) use ($keepAsciiChars, $encoding) {
2145
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2146
            },
2147
            self::split($str)
2148
        )
2149
    );
2150
  }
2151
2152
  /**
2153
   * UTF-8 version of html_entity_decode()
2154
   *
2155
   * The reason we are not using html_entity_decode() by itself is because
2156
   * while it is not technically correct to leave out the semicolon
2157
   * at the end of an entity most browsers will still interpret the entity
2158
   * correctly. html_entity_decode() does not convert entities without
2159
   * semicolons, so we are left with our own little solution here. Bummer.
2160
   *
2161
   * Convert all HTML entities to their applicable characters
2162
   *
2163
   * INFO: opposite to UTF8::html_encode()
2164
   *
2165
   * @link http://php.net/manual/en/function.html-entity-decode.php
2166
   *
2167
   * @param string $str      <p>
2168
   *                         The input string.
2169
   *                         </p>
2170
   * @param int    $flags    [optional] <p>
2171
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2172
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2173
   *                         <table>
2174
   *                         Available <i>flags</i> constants
2175
   *                         <tr valign="top">
2176
   *                         <td>Constant Name</td>
2177
   *                         <td>Description</td>
2178
   *                         </tr>
2179
   *                         <tr valign="top">
2180
   *                         <td><b>ENT_COMPAT</b></td>
2181
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2182
   *                         </tr>
2183
   *                         <tr valign="top">
2184
   *                         <td><b>ENT_QUOTES</b></td>
2185
   *                         <td>Will convert both double and single quotes.</td>
2186
   *                         </tr>
2187
   *                         <tr valign="top">
2188
   *                         <td><b>ENT_NOQUOTES</b></td>
2189
   *                         <td>Will leave both double and single quotes unconverted.</td>
2190
   *                         </tr>
2191
   *                         <tr valign="top">
2192
   *                         <td><b>ENT_HTML401</b></td>
2193
   *                         <td>
2194
   *                         Handle code as HTML 4.01.
2195
   *                         </td>
2196
   *                         </tr>
2197
   *                         <tr valign="top">
2198
   *                         <td><b>ENT_XML1</b></td>
2199
   *                         <td>
2200
   *                         Handle code as XML 1.
2201
   *                         </td>
2202
   *                         </tr>
2203
   *                         <tr valign="top">
2204
   *                         <td><b>ENT_XHTML</b></td>
2205
   *                         <td>
2206
   *                         Handle code as XHTML.
2207
   *                         </td>
2208
   *                         </tr>
2209
   *                         <tr valign="top">
2210
   *                         <td><b>ENT_HTML5</b></td>
2211
   *                         <td>
2212
   *                         Handle code as HTML 5.
2213
   *                         </td>
2214
   *                         </tr>
2215
   *                         </table>
2216
   *                         </p>
2217
   * @param string $encoding [optional] <p>Encoding to use.</p>
2218
   *
2219
   * @return string <p>The decoded string.</p>
2220
   */
2221
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2222
  {
2223
    // init
2224
    $str = (string)$str;
2225
2226
    if (!isset($str[0])) {
2227
      return '';
2228
    }
2229
2230
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2231
      return $str;
2232 1
    }
2233
2234 1
    if (
2235
        strpos($str, '&') === false
2236
        ||
2237
        (
2238 1
            strpos($str, '&#') === false
2239
            &&
2240
            strpos($str, ';') === false
2241
        )
2242
    ) {
2243
      return $str;
2244
    }
2245
2246 1
    if ($encoding !== 'UTF-8') {
2247
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2248 1
    }
2249
2250
    if ($flags === null) {
2251
      if (Bootup::is_php('5.4') === true) {
2252
        $flags = ENT_QUOTES | ENT_HTML5;
2253
      } else {
2254
        $flags = ENT_QUOTES;
2255
      }
2256
    }
2257
2258
    do {
2259
      $str_compare = $str;
2260
2261 3
      $str = preg_replace_callback(
2262
          "/&#\d{2,6};/",
2263 3
          function ($matches) use ($encoding) {
2264 3
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2265
2266 3
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2267
              return $returnTmp;
2268 3
            }
2269
2270
            return $matches[0];
2271
          },
2272
          $str
2273
      );
2274
2275
      // decode numeric & UTF16 two byte entities
2276
      $str = html_entity_decode(
2277
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2278
          $flags,
2279 1
          $encoding
2280
      );
2281 1
2282
    } while ($str_compare !== $str);
2283
2284
    return $str;
2285
  }
2286
2287
  /**
2288
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2289 2
   *
2290
   * @link http://php.net/manual/en/function.htmlentities.php
2291 2
   *
2292
   * @param string $str           <p>
2293
   *                              The input string.
2294
   *                              </p>
2295
   * @param int    $flags         [optional] <p>
2296
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2297
   *                              invalid code unit sequences and the used document type. The default is
2298
   *                              ENT_COMPAT | ENT_HTML401.
2299
   *                              <table>
2300
   *                              Available <i>flags</i> constants
2301
   *                              <tr valign="top">
2302
   *                              <td>Constant Name</td>
2303 2
   *                              <td>Description</td>
2304
   *                              </tr>
2305 2
   *                              <tr valign="top">
2306
   *                              <td><b>ENT_COMPAT</b></td>
2307
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2308
   *                              </tr>
2309
   *                              <tr valign="top">
2310
   *                              <td><b>ENT_QUOTES</b></td>
2311
   *                              <td>Will convert both double and single quotes.</td>
2312
   *                              </tr>
2313
   *                              <tr valign="top">
2314
   *                              <td><b>ENT_NOQUOTES</b></td>
2315
   *                              <td>Will leave both double and single quotes unconverted.</td>
2316
   *                              </tr>
2317 1
   *                              <tr valign="top">
2318
   *                              <td><b>ENT_IGNORE</b></td>
2319 1
   *                              <td>
2320
   *                              Silently discard invalid code unit sequences instead of returning
2321
   *                              an empty string. Using this flag is discouraged as it
2322
   *                              may have security implications.
2323
   *                              </td>
2324
   *                              </tr>
2325
   *                              <tr valign="top">
2326
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2327
   *                              <td>
2328
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2329
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2330
   *                              </td>
2331
   *                              </tr>
2332
   *                              <tr valign="top">
2333
   *                              <td><b>ENT_DISALLOWED</b></td>
2334
   *                              <td>
2335
   *                              Replace invalid code points for the given document type with a
2336
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2337
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2338
   *                              instance, to ensure the well-formedness of XML documents with
2339
   *                              embedded external content.
2340
   *                              </td>
2341
   *                              </tr>
2342
   *                              <tr valign="top">
2343
   *                              <td><b>ENT_HTML401</b></td>
2344
   *                              <td>
2345
   *                              Handle code as HTML 4.01.
2346
   *                              </td>
2347
   *                              </tr>
2348
   *                              <tr valign="top">
2349
   *                              <td><b>ENT_XML1</b></td>
2350
   *                              <td>
2351
   *                              Handle code as XML 1.
2352
   *                              </td>
2353
   *                              </tr>
2354
   *                              <tr valign="top">
2355
   *                              <td><b>ENT_XHTML</b></td>
2356
   *                              <td>
2357
   *                              Handle code as XHTML.
2358
   *                              </td>
2359 1
   *                              </tr>
2360
   *                              <tr valign="top">
2361 1
   *                              <td><b>ENT_HTML5</b></td>
2362
   *                              <td>
2363
   *                              Handle code as HTML 5.
2364
   *                              </td>
2365
   *                              </tr>
2366
   *                              </table>
2367
   *                              </p>
2368
   * @param string $encoding      [optional] <p>
2369
   *                              Like <b>htmlspecialchars</b>,
2370
   *                              <b>htmlentities</b> takes an optional third argument
2371
   *                              <i>encoding</i> which defines encoding used in
2372
   *                              conversion.
2373
   *                              Although this argument is technically optional, you are highly
2374
   *                              encouraged to specify the correct value for your code.
2375
   *                              </p>
2376
   * @param bool   $double_encode [optional] <p>
2377
   *                              When <i>double_encode</i> is turned off PHP will not
2378
   *                              encode existing html entities. The default is to convert everything.
2379
   *                              </p>
2380
   *
2381
   *
2382
   * @return string the encoded string.
2383
   * </p>
2384
   * <p>
2385
   * If the input <i>string</i> contains an invalid code unit
2386
   * sequence within the given <i>encoding</i> an empty string
2387 1
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2388
   * <b>ENT_SUBSTITUTE</b> flags are set.
2389 1
   */
2390
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2391
  {
2392
    if ($encoding !== 'UTF-8') {
2393
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2394
    }
2395
2396
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2397
2398
    if ($encoding !== 'UTF-8') {
2399
      return $str;
2400
    }
2401 1
2402
    $byteLengths = self::chr_size_list($str);
2403 1
    $search = array();
2404
    $replacements = array();
2405
    foreach ($byteLengths as $counter => $byteLength) {
2406
      if ($byteLength >= 3) {
2407
        $char = self::access($str, $counter);
2408
2409
        if (!isset($replacements[$char])) {
2410
          $search[$char] = $char;
2411
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2407 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2412
        }
2413
      }
2414
    }
2415
2416 16
    return str_replace($search, $replacements, $str);
2417
  }
2418 16
2419
  /**
2420
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2421
   *
2422
   * INFO: Take a look at "UTF8::htmlentities()"
2423
   *
2424
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2425
   *
2426
   * @param string $str           <p>
2427
   *                              The string being converted.
2428
   *                              </p>
2429
   * @param int    $flags         [optional] <p>
2430
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2431 28
   *                              invalid code unit sequences and the used document type. The default is
2432
   *                              ENT_COMPAT | ENT_HTML401.
2433 28
   *                              <table>
2434
   *                              Available <i>flags</i> constants
2435 28
   *                              <tr valign="top">
2436 5
   *                              <td>Constant Name</td>
2437
   *                              <td>Description</td>
2438
   *                              </tr>
2439 28
   *                              <tr valign="top">
2440
   *                              <td><b>ENT_COMPAT</b></td>
2441
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2442
   *                              </tr>
2443
   *                              <tr valign="top">
2444
   *                              <td><b>ENT_QUOTES</b></td>
2445
   *                              <td>Will convert both double and single quotes.</td>
2446
   *                              </tr>
2447
   *                              <tr valign="top">
2448
   *                              <td><b>ENT_NOQUOTES</b></td>
2449 1
   *                              <td>Will leave both double and single quotes unconverted.</td>
2450
   *                              </tr>
2451 1
   *                              <tr valign="top">
2452
   *                              <td><b>ENT_IGNORE</b></td>
2453 1
   *                              <td>
2454 1
   *                              Silently discard invalid code unit sequences instead of returning
2455
   *                              an empty string. Using this flag is discouraged as it
2456
   *                              may have security implications.
2457 1
   *                              </td>
2458 1
   *                              </tr>
2459
   *                              <tr valign="top">
2460 1
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2461
   *                              <td>
2462
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2463
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2464
   *                              </td>
2465
   *                              </tr>
2466
   *                              <tr valign="top">
2467
   *                              <td><b>ENT_DISALLOWED</b></td>
2468
   *                              <td>
2469
   *                              Replace invalid code points for the given document type with a
2470
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2471 16
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2472
   *                              instance, to ensure the well-formedness of XML documents with
2473
   *                              embedded external content.
2474 16
   *                              </td>
2475
   *                              </tr>
2476
   *                              <tr valign="top">
2477 16
   *                              <td><b>ENT_HTML401</b></td>
2478
   *                              <td>
2479 16
   *                              Handle code as HTML 4.01.
2480 16
   *                              </td>
2481 15
   *                              </tr>
2482 16
   *                              <tr valign="top">
2483 6
   *                              <td><b>ENT_XML1</b></td>
2484
   *                              <td>
2485 15
   *                              Handle code as XML 1.
2486
   *                              </td>
2487
   *                              </tr>
2488
   *                              <tr valign="top">
2489
   *                              <td><b>ENT_XHTML</b></td>
2490
   *                              <td>
2491
   *                              Handle code as XHTML.
2492
   *                              </td>
2493
   *                              </tr>
2494
   *                              <tr valign="top">
2495
   *                              <td><b>ENT_HTML5</b></td>
2496
   *                              <td>
2497
   *                              Handle code as HTML 5.
2498
   *                              </td>
2499
   *                              </tr>
2500
   *                              </table>
2501
   *                              </p>
2502
   * @param string $encoding      [optional] <p>
2503
   *                              Defines encoding used in conversion.
2504
   *                              </p>
2505
   *                              <p>
2506
   *                              For the purposes of this function, the encodings
2507
   *                              ISO-8859-1, ISO-8859-15,
2508
   *                              UTF-8, cp866,
2509
   *                              cp1251, cp1252, and
2510
   *                              KOI8-R are effectively equivalent, provided the
2511
   *                              <i>string</i> itself is valid for the encoding, as
2512
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2513
   *                              the same positions in all of these encodings.
2514
   *                              </p>
2515
   * @param bool   $double_encode [optional] <p>
2516
   *                              When <i>double_encode</i> is turned off PHP will not
2517
   *                              encode existing html entities, the default is to convert everything.
2518
   *                              </p>
2519
   *
2520
   * @return string The converted string.
2521
   * </p>
2522
   * <p>
2523
   * If the input <i>string</i> contains an invalid code unit
2524
   * sequence within the given <i>encoding</i> an empty string
2525
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2526
   * <b>ENT_SUBSTITUTE</b> flags are set.
2527
   */
2528
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2529
  {
2530
    if ($encoding !== 'UTF-8') {
2531
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2532
    }
2533
2534
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2535
  }
2536 1
2537
  /**
2538 1
   * Checks whether iconv is available on the server.
2539
   *
2540 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2541
   */
2542
  public static function iconv_loaded()
2543
  {
2544
    $return = extension_loaded('iconv') ? true : false;
2545 1
2546
    // INFO: "default_charset" is already set by the "Bootup"-class
2547 1
2548
    if (Bootup::is_php('5.6') === false) {
2549 1
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2550 1
      iconv_set_encoding('input_encoding', 'UTF-8');
2551
      iconv_set_encoding('output_encoding', 'UTF-8');
2552 1
      iconv_set_encoding('internal_encoding', 'UTF-8');
2553
    }
2554
2555
    return $return;
2556
  }
2557
2558
  /**
2559
   * alias for "UTF8::decimal_to_chr()"
2560
   *
2561
   * @see UTF8::decimal_to_chr()
2562
   *
2563 1
   * @param mixed $int
2564
   *
2565 1
   * @return string
2566
   */
2567 1
  public static function int_to_chr($int)
2568
  {
2569
    return self::decimal_to_chr($int);
2570
  }
2571
2572 1
  /**
2573 1
   * Converts Integer to hexadecimal U+xxxx code point representation.
2574 1
   *
2575 1
   * INFO: opposite to UTF8::hex_to_int()
2576 1
   *
2577
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2578 1
   * @param string $pfix [optional]
2579
   *
2580
   * @return string <p>The code point, or empty string on failure.</p>
2581
   */
2582
  public static function int_to_hex($int, $pfix = 'U+')
2583
  {
2584
    if ((int)$int === $int) {
2585
      $hex = dechex($int);
2586
2587
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2588
2589
      return $pfix . $hex;
2590
    }
2591
2592
    return '';
2593 4
  }
2594
2595 4
  /**
2596
   * Checks whether intl-char is available on the server.
2597 4
   *
2598
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2599 4
   */
2600 4
  public static function intlChar_loaded()
2601 4
  {
2602 4
    return (
2603 4
        Bootup::is_php('7.0') === true
2604 4
        &&
2605 4
        class_exists('IntlChar') === true
2606 4
    );
2607 4
  }
2608 2
2609 2
  /**
2610 4
   * Checks whether intl is available on the server.
2611 4
   *
2612 4
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2613
   */
2614 4
  public static function intl_loaded()
2615 4
  {
2616 4
    return extension_loaded('intl') ? true : false;
2617 4
  }
2618 4
2619 4
  /**
2620 4
   * alias for "UTF8::is_ascii()"
2621 4
   *
2622 4
   * @see UTF8::is_ascii()
2623 3
   *
2624 3
   * @param string $str
2625 4
   *
2626 4
   * @return boolean
2627 4
   *
2628
   * @deprecated
2629 4
   */
2630 3
  public static function isAscii($str)
2631 2
  {
2632
    return self::is_ascii($str);
2633 3
  }
2634
2635
  /**
2636
   * alias for "UTF8::is_base64()"
2637 3
   *
2638
   * @see UTF8::is_base64()
2639 3
   *
2640
   * @param string $str
2641
   *
2642
   * @return bool
2643
   *
2644
   * @deprecated
2645
   */
2646
  public static function isBase64($str)
2647
  {
2648
    return self::is_base64($str);
2649
  }
2650
2651
  /**
2652
   * alias for "UTF8::is_binary()"
2653 3
   *
2654
   * @see UTF8::is_binary()
2655 3
   *
2656
   * @param string $str
2657 3
   *
2658
   * @return bool
2659 3
   *
2660 3
   * @deprecated
2661 3
   */
2662 3
  public static function isBinary($str)
2663 3
  {
2664 3
    return self::is_binary($str);
2665 3
  }
2666 3
2667 3
  /**
2668 1
   * alias for "UTF8::is_bom()"
2669 1
   *
2670 3
   * @see UTF8::is_bom()
2671 3
   *
2672 3
   * @param string $utf8_chr
2673
   *
2674 3
   * @return boolean
2675 3
   *
2676 3
   * @deprecated
2677 3
   */
2678 3
  public static function isBom($utf8_chr)
2679 3
  {
2680 3
    return self::is_bom($utf8_chr);
2681 3
  }
2682 3
2683 1
  /**
2684 1
   * alias for "UTF8::is_html()"
2685 3
   *
2686 3
   * @see UTF8::is_html()
2687 3
   *
2688
   * @param string $str
2689 3
   *
2690 1
   * @return boolean
2691 1
   *
2692
   * @deprecated
2693 1
   */
2694
  public static function isHtml($str)
2695
  {
2696
    return self::is_html($str);
2697 3
  }
2698
2699 3
  /**
2700
   * alias for "UTF8::is_json()"
2701
   *
2702
   * @see UTF8::is_json()
2703
   *
2704
   * @param string $str
2705
   *
2706
   * @return bool
2707
   *
2708
   * @deprecated
2709
   */
2710
  public static function isJson($str)
2711
  {
2712 43
    return self::is_json($str);
2713
  }
2714 43
2715
  /**
2716 43
   * alias for "UTF8::is_utf16()"
2717 3
   *
2718
   * @see UTF8::is_utf16()
2719
   *
2720 41
   * @param string $str
2721 1
   *
2722 1
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2723
   *
2724
   * @deprecated
2725
   */
2726
  public static function isUtf16($str)
2727
  {
2728
    return self::is_utf16($str);
2729
  }
2730 41
2731
  /**
2732
   * alias for "UTF8::is_utf32()"
2733
   *
2734
   * @see UTF8::is_utf32()
2735
   *
2736
   * @param string $str
2737
   *
2738
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2739
   *
2740 41
   * @deprecated
2741
   */
2742 41
  public static function isUtf32($str)
2743 41
  {
2744 41
    return self::is_utf32($str);
2745
  }
2746
2747 41
  /**
2748 41
   * alias for "UTF8::is_utf8()"
2749 41
   *
2750
   * @see UTF8::is_utf8()
2751
   *
2752 41
   * @param string $str
2753
   * @param bool   $strict
2754 36
   *
2755 41
   * @return bool
2756
   *
2757 34
   * @deprecated
2758 34
   */
2759 34
  public static function isUtf8($str, $strict = false)
2760 34
  {
2761 39
    return self::is_utf8($str, $strict);
2762
  }
2763 21
2764 21
  /**
2765 21
   * Checks if a string is 7 bit ASCII.
2766 21
   *
2767 33
   * @param string $str <p>The string to check.</p>
2768
   *
2769 9
   * @return bool <p>
2770 9
   *              <strong>true</strong> if it is ASCII<br />
2771 9
   *              <strong>false</strong> otherwise
2772 9
   *              </p>
2773 16
   */
2774
  public static function is_ascii($str)
2775
  {
2776
    $str = (string)$str;
2777
2778
    if (!isset($str[0])) {
2779
      return true;
2780
    }
2781
2782 3
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2783 3
  }
2784 3
2785 3
  /**
2786 9
   * Returns true if the string is base64 encoded, false otherwise.
2787
   *
2788 3
   * @param string $str <p>The input string.</p>
2789 3
   *
2790 3
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2791 3
   */
2792 3
  public static function is_base64($str)
2793
  {
2794
    $str = (string)$str;
2795
2796 5
    if (!isset($str[0])) {
2797
      return false;
2798 41
    }
2799
2800
    $base64String = (string)base64_decode($str, true);
2801 36
    if ($base64String && base64_encode($base64String) === $str) {
2802
      return true;
2803 33
    }
2804 33
2805 33
    return false;
2806 33
  }
2807
2808
  /**
2809
   * Check if the input is binary... (is look like a hack).
2810
   *
2811 33
   * @param mixed $input
2812
   *
2813
   * @return bool
2814
   */
2815
  public static function is_binary($input)
2816
  {
2817 33
    $input = (string)$input;
2818 33
2819 33
    if (!isset($input[0])) {
2820 33
      return false;
2821
    }
2822 33
2823
    if (preg_match('~^[01]+$~', $input)) {
2824 33
      return true;
2825 33
    }
2826 5
2827
    $testLength = strlen($input);
2828
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2829 33
      return true;
2830 33
    }
2831 33
2832 33
    if (substr_count($input, "\x00") > 0) {
2833 33
      return true;
2834
    }
2835
2836
    return false;
2837
  }
2838 18
2839
  /**
2840
   * Check if the file is binary.
2841 41
   *
2842
   * @param string $file
2843 20
   *
2844
   * @return boolean
2845
   */
2846
  public static function is_binary_file($file)
2847
  {
2848
    try {
2849
      $fp = fopen($file, 'rb');
2850
      $block = fread($fp, 512);
2851
      fclose($fp);
2852
    } catch (\Exception $e) {
2853
      $block = '';
2854
    }
2855
2856
    return self::is_binary($block);
2857
  }
2858
2859
  /**
2860
   * Checks if the given string is equal to any "Byte Order Mark".
2861
   *
2862
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2863
   *
2864
   * @param string $str <p>The input string.</p>
2865
   *
2866
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2867
   */
2868
  public static function is_bom($str)
2869
  {
2870
    foreach (self::$BOM as $bomString => $bomByteLength) {
2871
      if ($str === $bomString) {
2872
        return true;
2873
      }
2874
    }
2875
2876
    return false;
2877
  }
2878
2879
  /**
2880
   * Check if the string contains any html-tags <lall>.
2881
   *
2882
   * @param string $str <p>The input string.</p>
2883 2
   *
2884
   * @return boolean
2885 2
   */
2886
  public static function is_html($str)
2887 2
  {
2888 2
    $str = (string)$str;
2889 2
2890
    if (!isset($str[0])) {
2891
      return false;
2892
    }
2893 2
2894
    // init
2895
    $matches = array();
2896
2897
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2898
2899
    if (count($matches) === 0) {
2900
      return false;
2901
    }
2902
2903
    return true;
2904
  }
2905
2906
  /**
2907
   * Try to check if "$str" is an json-string.
2908
   *
2909
   * @param string $str <p>The input string.</p>
2910
   *
2911
   * @return bool
2912
   */
2913
  public static function is_json($str)
2914
  {
2915
    $str = (string)$str;
2916
2917
    if (!isset($str[0])) {
2918
      return false;
2919
    }
2920
2921
    $json = self::json_decode($str);
2922
2923
    if (
2924
        (
2925
            is_object($json) === true
2926
            ||
2927
            is_array($json) === true
2928
        )
2929
        &&
2930
        json_last_error() === JSON_ERROR_NONE
2931
    ) {
2932 2
      return true;
2933
    }
2934 2
2935
    return false;
2936 2
  }
2937
2938
  /**
2939 2
   * Check if the string is UTF-16.
2940
   *
2941
   * @param string $str <p>The input string.</p>
2942 2
   *
2943
   * @return int|false <p>
2944
   *                   <strong>false</strong> if is't not UTF-16,<br />
2945
   *                   <strong>1</strong> for UTF-16LE,<br />
2946
   *                   <strong>2</strong> for UTF-16BE.
2947
   *                   </p>
2948
   */
2949 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2950
  {
2951
    $str = self::remove_bom($str);
2952 6
2953
    if (self::is_binary($str) === true) {
2954 6
2955
      $maybeUTF16LE = 0;
2956
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2957
      if ($test) {
2958
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2959
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2960
        if ($test3 === $test) {
2961
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 2951 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2962
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2963
            if (in_array($test3char, $strChars, true) === true) {
2964
              $maybeUTF16LE++;
2965 24
            }
2966
          }
2967 24
        }
2968
      }
2969 24
2970 2
      $maybeUTF16BE = 0;
2971
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2972
      if ($test) {
2973
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2974 23
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2975 2
        if ($test3 === $test) {
2976
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 2951 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2977
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2978 23
            if (in_array($test3char, $strChars, true) === true) {
2979
              $maybeUTF16BE++;
2980 23
            }
2981
          }
2982
        }
2983
      }
2984
2985
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2986
        if ($maybeUTF16LE > $maybeUTF16BE) {
2987
          return 1;
2988
        } else {
2989
          return 2;
2990 1
        }
2991
      }
2992 1
2993
    }
2994
2995
    return false;
2996 1
  }
2997
2998
  /**
2999
   * Check if the string is UTF-32.
3000
   *
3001
   * @param string $str
3002
   *
3003
   * @return int|false <p>
3004
   *                   <strong>false</strong> if is't not UTF-16,<br />
3005
   *                   <strong>1</strong> for UTF-32LE,<br />
3006
   *                   <strong>2</strong> for UTF-32BE.
3007 1
   *                   </p>
3008
   */
3009 1 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3010 1
  {
3011 1
    $str = self::remove_bom($str);
3012
3013 1
    if (self::is_binary($str) === true) {
3014
3015
      $maybeUTF32LE = 0;
3016
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3017
      if ($test) {
3018
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3019
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3020
        if ($test3 === $test) {
3021
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 3011 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3022 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3023
            if (in_array($test3char, $strChars, true) === true) {
3024 2
              $maybeUTF32LE++;
3025
            }
3026 2
          }
3027 2
        }
3028 2
      }
3029
3030 2
      $maybeUTF32BE = 0;
3031
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3032
      if ($test) {
3033
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3034
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3035
        if ($test3 === $test) {
3036
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 3011 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3037
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3038
            if (in_array($test3char, $strChars, true) === true) {
3039
              $maybeUTF32BE++;
3040 1
            }
3041
          }
3042 1
        }
3043
      }
3044
3045
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3046 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3047
          return 1;
3048
        } else {
3049
          return 2;
3050
        }
3051
      }
3052
3053
    }
3054
3055
    return false;
3056
  }
3057
3058 1
  /**
3059
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3060 1
   *
3061
   * @see    http://hsivonen.iki.fi/php-utf8/
3062
   *
3063
   * @param string $str    <p>The string to be checked.</p>
3064
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3065
   *
3066
   * @return bool
3067
   */
3068
  public static function is_utf8($str, $strict = false)
3069
  {
3070 16
    $str = (string)$str;
3071
3072 16
    if (!isset($str[0])) {
3073
      return true;
3074 16
    }
3075 2
3076
    if ($strict === true) {
3077
      if (self::is_utf16($str) !== false) {
3078 16
        return false;
3079 1
      }
3080
3081
      if (self::is_utf32($str) !== false) {
3082 16
        return false;
3083 4
      }
3084
    }
3085
3086 15
    if (self::pcre_utf8_support() !== true) {
3087 14
3088
      // If even just the first character can be matched, when the /u
3089
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3090 4
      // invalid, nothing at all will match, even if the string contains
3091 4
      // some valid sequences
3092 4
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3093
    }
3094
3095 4
    $mState = 0; // cached expected number of octets after the current octet
3096 4
    // until the beginning of the next UTF8 character sequence
3097 4
    $mUcs4 = 0; // cached Unicode character
3098 4
    $mBytes = 1; // cached expected number of octets in the current sequence
3099 4
3100 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3101 4
      self::checkForSupport();
3102 4
    }
3103 4
3104 4 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3105 4
      $len = \mb_strlen($str, '8BIT');
3106 4
    } else {
3107 4
      $len = strlen($str);
3108 4
    }
3109 4
3110
    /** @noinspection ForeachInvariantsInspection */
3111 4
    for ($i = 0; $i < $len; $i++) {
3112 4
      $in = ord($str[$i]);
3113 4
      if ($mState === 0) {
3114
        // When mState is zero we expect either a US-ASCII character or a
3115 4
        // multi-octet sequence.
3116
        if (0 === (0x80 & $in)) {
3117 4
          // US-ASCII, pass straight through.
3118
          $mBytes = 1;
3119 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3120
          // First octet of 2 octet sequence.
3121
          $mUcs4 = $in;
3122
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3123
          $mState = 1;
3124
          $mBytes = 2;
3125
        } elseif (0xE0 === (0xF0 & $in)) {
3126
          // First octet of 3 octet sequence.
3127 13
          $mUcs4 = $in;
3128
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3129 13
          $mState = 2;
3130 13
          $mBytes = 3;
3131 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3132 13
          // First octet of 4 octet sequence.
3133 1
          $mUcs4 = $in;
3134 1
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3135 1
          $mState = 3;
3136
          $mBytes = 4;
3137 13
        } elseif (0xF8 === (0xFC & $in)) {
3138
          /* First octet of 5 octet sequence.
3139
          *
3140
          * This is illegal because the encoded codepoint must be either
3141
          * (a) not the shortest form or
3142
          * (b) outside the Unicode range of 0-0x10FFFF.
3143
          * Rather than trying to resynchronize, we will carry on until the end
3144
          * of the sequence and let the later error handling code catch it.
3145
          */
3146
          $mUcs4 = $in;
3147
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3148
          $mState = 4;
3149
          $mBytes = 5;
3150 18 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3151
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3152 18
          $mUcs4 = $in;
3153 18
          $mUcs4 = ($mUcs4 & 1) << 30;
3154
          $mState = 5;
3155 18
          $mBytes = 6;
3156
        } else {
3157 18
          /* Current octet is neither in the US-ASCII range nor a legal first
3158
           * octet of a multi-octet sequence.
3159 2
           */
3160
          return false;
3161 2
        }
3162
      } else {
3163 1
        // When mState is non-zero, we expect a continuation of the multi-octet
3164 1
        // sequence
3165
        if (0x80 === (0xC0 & $in)) {
3166 2
          // Legal continuation.
3167 2
          $shift = ($mState - 1) * 6;
3168
          $tmp = $in;
3169 18
          $tmp = ($tmp & 0x0000003F) << $shift;
3170 18
          $mUcs4 |= $tmp;
3171 1
          /**
3172 1
           * End of the multi-octet sequence. mUcs4 now contains the final
3173
           * Unicode code point to be output
3174 18
           */
3175 18
          if (0 === --$mState) {
3176
            /*
3177 18
            * Check for illegal sequences and code points.
3178
            */
3179
            // From Unicode 3.1, non-shortest form is illegal
3180
            if (
3181
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3182
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3183
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3184
                (4 < $mBytes) ||
3185
                // From Unicode 3.2, surrogate characters are illegal.
3186
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3187
                // Code points outside the Unicode range are illegal.
3188
                ($mUcs4 > 0x10FFFF)
3189
            ) {
3190
              return false;
3191
            }
3192
            // initialize UTF8 cache
3193
            $mState = 0;
3194
            $mUcs4 = 0;
3195
            $mBytes = 1;
3196
          }
3197
        } else {
3198
          /**
3199
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3200
           * Incomplete multi-octet sequence.
3201
           */
3202
          return false;
3203
        }
3204
      }
3205
    }
3206
3207
    return true;
3208
  }
3209
3210
  /**
3211
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3212
   * Decodes a JSON string
3213
   *
3214
   * @link http://php.net/manual/en/function.json-decode.php
3215
   *
3216
   * @param string $json    <p>
3217
   *                        The <i>json</i> string being decoded.
3218
   *                        </p>
3219
   *                        <p>
3220
   *                        This function only works with UTF-8 encoded strings.
3221
   *                        </p>
3222
   *                        <p>PHP implements a superset of
3223
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3224
   *                        only supports these values when they are nested inside an array or an object.
3225
   *                        </p>
3226
   * @param bool   $assoc   [optional] <p>
3227
   *                        When <b>TRUE</b>, returned objects will be converted into
3228
   *                        associative arrays.
3229
   *                        </p>
3230 17
   * @param int    $depth   [optional] <p>
3231
   *                        User specified recursion depth.
3232 17
   *                        </p>
3233 3
   * @param int    $options [optional] <p>
3234
   *                        Bitmask of JSON decode options. Currently only
3235
   *                        <b>JSON_BIGINT_AS_STRING</b>
3236 16
   *                        is supported (default is to cast large integers as floats)
3237
   *                        </p>
3238
   *
3239
   * @return mixed the value encoded in <i>json</i> in appropriate
3240 16
   * PHP type. Values true, false and
3241
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3242
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3243
   * <i>json</i> cannot be decoded or if the encoded
3244
   * data is deeper than the recursion limit.
3245
   */
3246 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3247
  {
3248 16
    $json = (string)self::filter($json);
3249 16
3250 15
    if (Bootup::is_php('5.4') === true) {
3251
      $json = json_decode($json, $assoc, $depth, $options);
3252
    } else {
3253 9
      $json = json_decode($json, $assoc, $depth);
3254 9
    }
3255 9
3256
    return $json;
3257 9
  }
3258 1
3259
  /**
3260
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3261 9
   * Returns the JSON representation of a value.
3262 4
   *
3263
   * @link http://php.net/manual/en/function.json-encode.php
3264
   *
3265 9
   * @param mixed $value   <p>
3266 5
   *                       The <i>value</i> being encoded. Can be any type except
3267
   *                       a resource.
3268
   *                       </p>
3269 9
   *                       <p>
3270
   *                       All string data must be UTF-8 encoded.
3271
   *                       </p>
3272
   *                       <p>PHP implements a superset of
3273
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3274
   *                       only supports these values when they are nested inside an array or an object.
3275
   *                       </p>
3276
   * @param int   $options [optional] <p>
3277
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3278
   *                       <b>JSON_HEX_TAG</b>,
3279
   *                       <b>JSON_HEX_AMP</b>,
3280
   *                       <b>JSON_HEX_APOS</b>,
3281
   *                       <b>JSON_NUMERIC_CHECK</b>,
3282
   *                       <b>JSON_PRETTY_PRINT</b>,
3283
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3284
   *                       <b>JSON_FORCE_OBJECT</b>,
3285 1
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3286
   *                       constants is described on
3287
   *                       the JSON constants page.
3288 1
   *                       </p>
3289
   * @param int   $depth   [optional] <p>
3290 1
   *                       Set the maximum depth. Must be greater than zero.
3291 1
   *                       </p>
3292 1
   *
3293
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3294
   */
3295 1 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3296
  {
3297
    $value = self::filter($value);
3298
3299
    if (Bootup::is_php('5.5') === true) {
3300
      $json = json_encode($value, $options, $depth);
3301
    } else {
3302
      $json = json_encode($value, $options);
3303 41
    }
3304
3305
    return $json;
3306 41
  }
3307
3308
  /**
3309
   * Makes string's first char lowercase.
3310
   *
3311
   * @param string $str <p>The input string</p>
3312
   *
3313
   * @return string <p>The resulting string</p>
3314
   */
3315
  public static function lcfirst($str)
3316
  {
3317 1
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3318
  }
3319 1
3320 1
  /**
3321
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3322
   *
3323 1
   * @param string $str   <p>The string to be trimmed</p>
3324 1
   * @param string $chars <p>Optional characters to be stripped</p>
3325 1
   *
3326
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3327
   */
3328 1 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3329
  {
3330
    $str = (string)$str;
3331 1
3332
    if (!isset($str[0])) {
3333
      return '';
3334
    }
3335 1
3336 1
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3337 1
    if ($chars === INF || !$chars) {
3338
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3339
    }
3340 1
3341
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3342
  }
3343 1
3344
  /**
3345
   * Returns the UTF-8 character with the maximum code point in the given data.
3346
   *
3347 1
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3348
   *
3349 1
   * @return string <p>The character with the highest code point than others.</p>
3350 1
   */
3351 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3352 1
  {
3353 1
    if (is_array($arg) === true) {
3354
      $arg = implode('', $arg);
3355
    }
3356
3357
    return self::chr(max(self::codepoints($arg)));
3358
  }
3359
3360
  /**
3361
   * Calculates and returns the maximum number of bytes taken by any
3362
   * UTF-8 encoded character in the given string.
3363
   *
3364
   * @param string $str <p>The original Unicode string.</p>
3365 5
   *
3366
   * @return int <p>Max byte lengths of the given chars.</p>
3367 5
   */
3368
  public static function max_chr_width($str)
3369
  {
3370
    $bytes = self::chr_size_list($str);
3371
    if (count($bytes) > 0) {
3372
      return (int)max($bytes);
3373
    }
3374
3375
    return 0;
3376
  }
3377 10
3378
  /**
3379 10
   * Checks whether mbstring is available on the server.
3380 10
   *
3381 5
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3382 5
   */
3383 10
  public static function mbstring_loaded()
3384
  {
3385 10
    $return = extension_loaded('mbstring') ? true : false;
3386
3387
    if ($return === true) {
3388
      \mb_internal_encoding('UTF-8');
3389
    }
3390
3391
    return $return;
3392
  }
3393
3394
  /**
3395
   * Returns the UTF-8 character with the minimum code point in the given data.
3396 1
   *
3397
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3398 1
   *
3399 1
   * @return string <p>The character with the lowest code point than others.</p>
3400 1
   */
3401 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3402 1
  {
3403 1
    if (is_array($arg) === true) {
3404 1
      $arg = implode('', $arg);
3405 1
    }
3406 1
3407
    return self::chr(min(self::codepoints($arg)));
3408 1
  }
3409
3410
  /**
3411
   * alias for "UTF8::normalize_encoding()"
3412
   *
3413
   * @see UTF8::normalize_encoding()
3414
   *
3415
   * @param string $encoding
3416
   * @param mixed  $fallback
3417
   *
3418
   * @return string
3419
   *
3420
   * @deprecated
3421
   */
3422
  public static function normalizeEncoding($encoding, $fallback = false)
3423
  {
3424 45
    return self::normalize_encoding($encoding, $fallback);
3425
  }
3426
3427 45
  /**
3428
   * Normalize the encoding-"name" input.
3429
   *
3430
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3431 45
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3432 45
   *
3433 45
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3434 45
   */
3435
  public static function normalize_encoding($encoding, $fallback = false)
3436 45
  {
3437
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3438
3439 45
    if (!$encoding) {
3440 45
      return $fallback;
3441
    }
3442 45
3443
    if ('UTF-8' === $encoding) {
3444
      return $encoding;
3445
    }
3446
3447
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3448
      return $encoding;
3449
    }
3450
3451
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3452
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3453 45
    }
3454
3455 45
    $encodingOrig = $encoding;
3456
    $encoding = strtoupper($encoding);
3457 45
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3458 45
3459 45
    $equivalences = array(
3460
        'ISO88591'    => 'ISO-8859-1',
3461 45
        'ISO8859'     => 'ISO-8859-1',
3462 45
        'ISO'         => 'ISO-8859-1',
3463 45
        'LATIN1'      => 'ISO-8859-1',
3464
        'LATIN'       => 'ISO-8859-1',
3465 45
        'WIN1252'     => 'ISO-8859-1',
3466
        'WINDOWS1252' => 'ISO-8859-1',
3467
        'UTF16'       => 'UTF-16',
3468
        'UTF32'       => 'UTF-32',
3469
        'UTF8'        => 'UTF-8',
3470
        'UTF'         => 'UTF-8',
3471
        'UTF7'        => 'UTF-7',
3472
        '8BIT'        => 'CP850',
3473
        'BINARY'      => 'CP850',
3474
    );
3475
3476 23
    if (!empty($equivalences[$encodingUpperHelper])) {
3477
      $encoding = $equivalences[$encodingUpperHelper];
3478 23
    }
3479
3480 23
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3481 5
3482
    return $encoding;
3483
  }
3484
3485 19
  /**
3486 3
   * Normalize some MS Word special characters.
3487
   *
3488
   * @param string $str <p>The string to be normalized.</p>
3489 18
   *
3490
   * @return string
3491 18
   */
3492 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3493
  {
3494
    // init
3495
    $str = (string)$str;
3496
3497
    if (!isset($str[0])) {
3498
      return '';
3499
    }
3500
3501
    static $UTF8_MSWORD_KEYS_CACHE = null;
3502 52
    static $UTF8_MSWORD_VALUES_CACHE = null;
3503
3504 52
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3505
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3506 52
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3507
    }
3508 52
3509 40
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3510
  }
3511
3512 18
  /**
3513
   * Normalize the whitespace.
3514
   *
3515 18
   * @param string $str                     <p>The string to be normalized.</p>
3516 17
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3517
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3518 17
   *                                        bidirectional text chars.</p>
3519 17
   *
3520 17
   * @return string
3521 2
   */
3522 2
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3523
  {
3524
    // init
3525 18
    $str = (string)$str;
3526
3527 18
    if (!isset($str[0])) {
3528 18
      return '';
3529 18
    }
3530
3531 18
    static $WHITESPACE_CACHE = array();
3532 18
    $cacheKey = (int)$keepNonBreakingSpace;
3533 18
3534
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3535
3536
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3537 18
3538
      if ($keepNonBreakingSpace === true) {
3539 18
        /** @noinspection OffsetOperationsInspection */
3540
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3541
      }
3542
3543
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3544
    }
3545
3546
    if ($keepBidiUnicodeControls === false) {
3547
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3548
3549
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3550
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3551
      }
3552
3553
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3554
    }
3555
3556
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3557
  }
3558
3559
  /**
3560 1
   * Strip all whitespace characters. This includes tabs and newline
3561
   * characters, as well as multibyte whitespace such as the thin space
3562 1
   * and ideographic space.
3563 1
   *
3564
   * @param string $str
3565
   *
3566
   * @return string
3567
   */
3568 1
  public static function strip_whitespace($str)
3569 1
  {
3570 1
    // init
3571 1
    $str = (string)$str;
3572
3573
    if (!isset($str[0])) {
3574 1
      return '';
3575
    }
3576
3577
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3578
  }
3579
3580
  /**
3581
   * Format a number with grouped thousands.
3582
   *
3583
   * @param float  $number
3584
   * @param int    $decimals
3585
   * @param string $dec_point
3586 36
   * @param string $thousands_sep
3587
   *
3588 36
   * @return string
3589
   *    *
3590 36
   * @deprecated Because this has nothing to do with UTF8. :/
3591 2
   */
3592
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3593
  {
3594
    $thousands_sep = (string)$thousands_sep;
3595 36
    $dec_point = (string)$dec_point;
3596 36
    $number = (float)$number;
3597
3598 36
    if (
3599
        isset($thousands_sep[1], $dec_point[1])
3600
        &&
3601
        Bootup::is_php('5.4') === true
3602 36
    ) {
3603
      return str_replace(
3604 36
          array(
3605 6
              '.',
3606 6
              ',',
3607
          ),
3608 36
          array(
3609 36
              $dec_point,
3610 36
              $thousands_sep,
3611 36
          ),
3612 36
          number_format($number, $decimals, '.', ',')
3613
      );
3614 36
    }
3615
3616
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3617
  }
3618
3619
  /**
3620
   * Calculates Unicode code point of the given UTF-8 encoded character.
3621
   *
3622
   * INFO: opposite to UTF8::chr()
3623
   *
3624
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3625
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3626
   *
3627
   * @return int <p>
3628
   *             Unicode code point of the given character,<br />
3629
   *             0 on invalid UTF-8 byte sequence.
3630
   *             </p>
3631
   */
3632
  public static function ord($chr, $encoding = 'UTF-8')
3633
  {
3634
3635
    if ($encoding !== 'UTF-8') {
3636
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3637
3638
      // check again, if it's still not UTF-8
3639
      /** @noinspection NotOptimalIfConditionsInspection */
3640
      if ($encoding !== 'UTF-8') {
3641
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3642
      }
3643
    }
3644
3645
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3646 36
      self::checkForSupport();
3647 5
    }
3648
3649 5
    if (self::$SUPPORT['intlChar'] === true) {
3650 5
      $tmpReturn = \IntlChar::ord($chr);
3651
      if ($tmpReturn) {
3652
        return $tmpReturn;
3653 36
      }
3654
    }
3655
3656
    // use static cache, if there is no support for "\IntlChar"
3657 36
    static $CHAR_CACHE = array();
3658
    if (isset($CHAR_CACHE[$chr]) === true) {
3659
      return $CHAR_CACHE[$chr];
3660
    }
3661
3662
    $chr_orig = $chr;
3663
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3664
    $chr = unpack('C*', self::substr($chr, 0, 4, '8BIT'));
3665
    $code = $chr ? $chr[1] : 0;
3666
3667
    if (0xF0 <= $code && isset($chr[4])) {
3668
      return $CHAR_CACHE[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3669
    }
3670 12
3671
    if (0xE0 <= $code && isset($chr[3])) {
3672
      return $CHAR_CACHE[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3673
    }
3674
3675
    if (0xC0 <= $code && isset($chr[2])) {
3676 12
      return $CHAR_CACHE[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3677 2
    }
3678 1
3679 2
    return $CHAR_CACHE[$chr_orig] = $code;
3680 1
  }
3681 2
3682
  /**
3683 2
   * Parses the string into an array (into the the second parameter).
3684
   *
3685
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3686 2
   *          if the second parameter is not set!
3687
   *
3688
   * @link http://php.net/manual/en/function.parse-str.php
3689
   *
3690
   * @param string  $str       <p>The input string.</p>
3691
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3692 12
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3693 3
   *
3694
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3695
   */
3696
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3697
  {
3698
    if ($cleanUtf8 === true) {
3699
      $str = self::clean($str);
3700 12
    }
3701 9
3702
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3703
    $return = \mb_parse_str($str, $result);
3704
    if ($return === false || empty($result)) {
3705
      return false;
3706
    }
3707
3708
    return true;
3709
  }
3710 6
3711 6
  /**
3712 6
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3713 6
   *
3714 6
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3715 6
   */
3716 6
  public static function pcre_utf8_support()
3717 6
  {
3718 6
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3719 6
    return (bool)@preg_match('//u', '');
3720 6
  }
3721 6
3722 6
  /**
3723 6
   * Create an array containing a range of UTF-8 characters.
3724 6
   *
3725 6
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3726 6
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3727 6
   *
3728 6
   * @return array
3729 6
   */
3730 6
  public static function range($var1, $var2)
3731
  {
3732 6
    if (!$var1 || !$var2) {
3733 6
      return array();
3734 6
    }
3735
3736 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3737
      $start = (int)$var1;
3738
    } elseif (ctype_xdigit($var1)) {
3739
      $start = (int)self::hex_to_int($var1);
3740
    } else {
3741
      $start = self::ord($var1);
3742
    }
3743
3744
    if (!$start) {
3745
      return array();
3746
    }
3747
3748 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3749
      $end = (int)$var2;
3750
    } elseif (ctype_xdigit($var2)) {
3751
      $end = (int)self::hex_to_int($var2);
3752
    } else {
3753
      $end = self::ord($var2);
3754
    }
3755
3756
    if (!$end) {
3757
      return array();
3758
    }
3759
3760
    return array_map(
3761
        array(
3762
            '\\voku\\helper\\UTF8',
3763
            'chr',
3764
        ),
3765
        range($start, $end)
3766
    );
3767
  }
3768
3769
  /**
3770
   * Multi decode html entity & fix urlencoded-win1252-chars.
3771
   *
3772
   * e.g:
3773
   * 'test+test'                     => 'test+test'
3774
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3775
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3776
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3777
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3778 14
   * 'Düsseldorf'                   => 'Düsseldorf'
3779
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3780 14
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3781
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3782
   *
3783 14
   * @param string $str          <p>The input string.</p>
3784 14
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3785 1
   *
3786 1
   * @return string
3787 13
   */
3788 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3789 14
  {
3790
    $str = (string)$str;
3791 14
3792 14
    if (!isset($str[0])) {
3793
      return '';
3794 14
    }
3795
3796
    $pattern = '/%u([0-9a-f]{3,4})/i';
3797
    if (preg_match($pattern, $str)) {
3798
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3799
    }
3800
3801
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3802
3803
    do {
3804
      $str_compare = $str;
3805
3806 1
      $str = self::fix_simple_utf8(
3807
          rawurldecode(
3808 1
              self::html_entity_decode(
3809
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3810 1
                  $flags
3811
              )
3812
          )
3813
      );
3814 1
3815
    } while ($multi_decode === true && $str_compare !== $str);
3816 1
3817
    return (string)$str;
3818
  }
3819
3820 1
  /**
3821 1
   * alias for "UTF8::remove_bom()"
3822
   *
3823
   * @see UTF8::remove_bom()
3824 1
   *
3825 1
   * @param string $str
3826 1
   *
3827 1
   * @return string
3828
   *
3829 1
   * @deprecated
3830
   */
3831
  public static function removeBOM($str)
3832 1
  {
3833
    return self::remove_bom($str);
3834
  }
3835 1
3836
  /**
3837
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3838
   *
3839
   * @param string $str <p>The input string.</p>
3840
   *
3841
   * @return string <p>String without UTF-BOM</p>
3842
   */
3843
  public static function remove_bom($str)
3844
  {
3845
    $str = (string)$str;
3846
3847
    if (!isset($str[0])) {
3848
      return '';
3849
    }
3850
3851 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
3852
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::substr($str, $bomByteLength, null, '8BIT') on line 3853 can also be of type false; however, voku\helper\UTF8::strpos() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3853 2
        $str = self::substr($str, $bomByteLength, null, '8BIT');
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::substr($str, $bomByteLength, null, '8BIT') on line 3853 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3854
      }
3855
    }
3856 2
3857 2
    return $str;
3858
  }
3859 2
3860
  /**
3861 2
   * Removes duplicate occurrences of a string in another string.
3862 2
   *
3863
   * @param string          $str  <p>The base string.</p>
3864 2
   * @param string|string[] $what <p>String to search for in the base string.</p>
3865
   *
3866
   * @return string <p>The result string with removed duplicates.</p>
3867 2
   */
3868 2
  public static function remove_duplicates($str, $what = ' ')
3869 2
  {
3870 2
    if (is_string($what) === true) {
3871 2
      $what = array($what);
3872
    }
3873 2
3874 2
    if (is_array($what) === true) {
3875 2
      /** @noinspection ForeachSourceInspection */
3876 2
      foreach ($what as $item) {
3877 2
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3878 2
      }
3879
    }
3880 2
3881 2
    return $str;
3882 2
  }
3883 2
3884 2
  /**
3885 2
   * Remove invisible characters from a string.
3886
   *
3887 2
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3888
   *
3889
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3890 2
   *
3891
   * @param string $str
3892
   * @param bool   $url_encoded
3893
   * @param string $replacement
3894
   *
3895
   * @return string
3896
   */
3897
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3898
  {
3899
    // init
3900
    $non_displayables = array();
3901
3902
    // every control character except newline (dec 10),
3903
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3904
    if ($url_encoded) {
3905
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3906
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3907
    }
3908
3909
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3910
3911 1
    do {
3912
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3913 1
    } while ($count !== 0);
3914
3915 1
    return $str;
3916
  }
3917
3918
  /**
3919
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3920
   *
3921
   * @param string $str                <p>The input string</p>
3922
   * @param string $replacementChar    <p>The replacement character.</p>
3923
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3924
   *
3925
   * @return string
3926
   */
3927
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
3928
  {
3929
    $str = (string)$str;
3930
3931
    if (!isset($str[0])) {
3932
      return '';
3933
    }
3934
3935
    if ($processInvalidUtf8 === true) {
3936
      $replacementCharHelper = $replacementChar;
3937
      if ($replacementChar === '') {
3938
        $replacementCharHelper = 'none';
3939
      }
3940
3941
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3942
        self::checkForSupport();
3943
      }
3944
3945
      if (self::$SUPPORT['mbstring'] === false) {
3946
        trigger_error('UTF8::replace_diamond_question_mark() without mbstring cannot handle all chars correctly', E_USER_WARNING);
3947 12
      }
3948
3949 12
      $save = \mb_substitute_character();
3950
      \mb_substitute_character($replacementCharHelper);
3951
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3952
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3953
      \mb_substitute_character($save);
3954
    }
3955
3956
    return str_replace(
3957
        array(
3958
            "\xEF\xBF\xBD",
3959 1
            '�',
3960
        ),
3961 1
        array(
3962
            $replacementChar,
3963 1
            $replacementChar,
3964
        ),
3965 1
        $str
3966
    );
3967
  }
3968
3969
  /**
3970
   * Strip whitespace or other characters from end of a UTF-8 string.
3971
   *
3972
   * @param string $str   <p>The string to be trimmed.</p>
3973
   * @param string $chars <p>Optional characters to be stripped.</p>
3974
   *
3975
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3976
   */
3977 1 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3978
  {
3979 1
    $str = (string)$str;
3980
3981 1
    if (!isset($str[0])) {
3982 1
      return '';
3983 1
    }
3984
3985 1
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3986 1
    if ($chars === INF || !$chars) {
3987 1
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3988 1
    }
3989
3990
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3991 1
  }
3992
3993
  /**
3994
   * rxClass
3995
   *
3996
   * @param string $s
3997
   * @param string $class
3998
   *
3999
   * @return string
4000
   */
4001
  private static function rxClass($s, $class = '')
4002 21
  {
4003
    static $RX_CLASSS_CACHE = array();
4004
4005 21
    $cacheKey = $s . $class;
4006 21
4007
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4008 21
      return $RX_CLASSS_CACHE[$cacheKey];
4009 1
    }
4010
4011
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4012 20
    $class = array($class);
4013
4014
    /** @noinspection SuspiciousLoopInspection */
4015
    foreach (self::str_split($s) as $s) {
4016 20
      if ('-' === $s) {
4017 20
        $class[0] = '-' . $class[0];
4018
      } elseif (!isset($s[2])) {
4019 20
        $class[0] .= preg_quote($s, '/');
4020 20
      } elseif (1 === self::strlen($s)) {
4021
        $class[0] .= $s;
4022
      } else {
4023 1
        $class[] = $s;
4024 1
      }
4025
    }
4026
4027 1
    if ($class[0]) {
4028 1
      $class[0] = '[' . $class[0] . ']';
4029 1
    }
4030 1
4031 1
    if (1 === count($class)) {
4032
      $return = $class[0];
4033 1
    } else {
4034
      $return = '(?:' . implode('|', $class) . ')';
4035 1
    }
4036
4037
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4038
4039
    return $return;
4040
  }
4041
4042
  /**
4043
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
4044
   */
4045 1
  public static function showSupport()
4046
  {
4047 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4048
      self::checkForSupport();
4049 1
    }
4050
4051 1
    foreach (self::$SUPPORT as $utf8Support) {
4052
      echo $utf8Support . "\n<br>";
4053
    }
4054
  }
4055
4056
  /**
4057
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4058
   *
4059
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4060
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4061
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4062
   *
4063
   * @return string <p>The HTML numbered entity.</p>
4064
   */
4065 7
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4066
  {
4067 7
    // init
4068
    $char = (string)$char;
4069
4070
    if (!isset($char[0])) {
4071
      return '';
4072
    }
4073
4074
    if (
4075
        $keepAsciiChars === true
4076
        &&
4077
        self::is_ascii($char) === true
4078
    ) {
4079
      return $char;
4080
    }
4081
4082
    if ($encoding !== 'UTF-8') {
4083 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4084
    }
4085 1
4086 1
    return '&#' . self::ord($char, $encoding) . ';';
4087
  }
4088 1
4089
  /**
4090 1
   * Convert a string to an array of Unicode characters.
4091
   *
4092 1
   * @param string  $str       <p>The string to split into array.</p>
4093 1
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4094 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4095 1
   *
4096
   * @return string[] <p>An array containing chunks of the string.</p>
4097 1
   */
4098
  public static function split($str, $length = 1, $cleanUtf8 = false)
4099 1
  {
4100 1
    $str = (string)$str;
4101 1
4102 1
    if (!isset($str[0])) {
4103 1
      return array();
4104 1
    }
4105
4106 1
    // init
4107
    $ret = array();
4108 1
4109
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4110
      self::checkForSupport();
4111
    }
4112 1
4113
    if (self::$SUPPORT['pcre_utf8'] === true) {
4114
4115
      if ($cleanUtf8 === true) {
4116
        $str = self::clean($str);
4117
      }
4118
4119
      preg_match_all('/./us', $str, $retArray);
4120
      if (isset($retArray[0])) {
4121
        $ret = $retArray[0];
4122
      }
4123
      unset($retArray);
4124
4125
    } else {
4126
4127
      // fallback
4128
4129 9
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4130
        self::checkForSupport();
4131 9
      }
4132
4133 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4134
        $len = \mb_strlen($str, '8BIT');
4135
      } else {
4136
        $len = strlen($str);
4137
      }
4138
4139
      /** @noinspection ForeachInvariantsInspection */
4140
      for ($i = 0; $i < $len; $i++) {
4141
4142
        if (($str[$i] & "\x80") === "\x00") {
4143
4144
          $ret[] = $str[$i];
4145
4146
        } elseif (
4147 1
            isset($str[$i + 1])
4148
            &&
4149 1
            ($str[$i] & "\xE0") === "\xC0"
4150
        ) {
4151
4152
          if (($str[$i + 1] & "\xC0") === "\x80") {
4153
            $ret[] = $str[$i] . $str[$i + 1];
4154
4155
            $i++;
4156
          }
4157
4158 View Code Duplication
        } elseif (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4159
            isset($str[$i + 2])
4160
            &&
4161
            ($str[$i] & "\xF0") === "\xE0"
4162
        ) {
4163
4164 12
          if (
4165
              ($str[$i + 1] & "\xC0") === "\x80"
4166 12
              &&
4167 11
              ($str[$i + 2] & "\xC0") === "\x80"
4168 11
          ) {
4169 12
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4170
4171
            $i += 2;
4172
          }
4173
4174
        } elseif (
4175
            isset($str[$i + 3])
4176
            &&
4177
            ($str[$i] & "\xF8") === "\xF0"
4178
        ) {
4179
4180 View Code Duplication
          if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4181
              ($str[$i + 1] & "\xC0") === "\x80"
4182 9
              &&
4183
              ($str[$i + 2] & "\xC0") === "\x80"
4184 9
              &&
4185 1
              ($str[$i + 3] & "\xC0") === "\x80"
4186
          ) {
4187
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4188 8
4189 2
            $i += 3;
4190 2
          }
4191
4192 8
        }
4193 8
      }
4194 1
    }
4195
4196
    if ($length > 1) {
4197 7
      $ret = array_chunk($ret, $length);
4198
4199 7
      return array_map(
4200
          function ($item) {
4201
            return implode('', $item);
4202 1
          }, $ret
4203
      );
4204
    }
4205
4206
    /** @noinspection OffsetOperationsInspection */
4207
    if (isset($ret[0]) && $ret[0] === '') {
4208
      return array();
4209
    }
4210
4211
    return $ret;
4212
  }
4213
4214
  /**
4215
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4216
   *
4217
   * @param string $str <p>The input string.</p>
4218 1
   *
4219
   * @return false|string <p>
4220 1
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4221
   *                      otherwise it will return false.
4222
   *                      </p>
4223
   */
4224
  public static function str_detect_encoding($str)
4225
  {
4226
    //
4227
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4228
    //
4229
4230
    if (self::is_binary($str) === true) {
4231
4232 2
      if (self::is_utf16($str) === 1) {
4233
        return 'UTF-16LE';
4234 2
      }
4235 2
4236
      if (self::is_utf16($str) === 2) {
4237 2
        return 'UTF-16BE';
4238 2
      }
4239 2
4240
      if (self::is_utf32($str) === 1) {
4241 2
        return 'UTF-32LE';
4242 2
      }
4243
4244
      if (self::is_utf32($str) === 2) {
4245
        return 'UTF-32BE';
4246
      }
4247
4248
    }
4249
4250
    //
4251
    // 2.) simple check for ASCII chars
4252 3
    //
4253
4254 3
    if (self::is_ascii($str) === true) {
4255 3
      return 'ASCII';
4256 3
    }
4257
4258 3
    //
4259
    // 3.) simple check for UTF-8 chars
4260 3
    //
4261
4262
    if (self::is_utf8($str) === true) {
4263
      return 'UTF-8';
4264
    }
4265
4266
    //
4267
    // 4.) check via "\mb_detect_encoding()"
4268
    //
4269
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4270
4271
    $detectOrder = array(
4272
        'ISO-8859-1',
4273
        'ISO-8859-2',
4274
        'ISO-8859-3',
4275
        'ISO-8859-4',
4276
        'ISO-8859-5',
4277
        'ISO-8859-6',
4278
        'ISO-8859-7',
4279
        'ISO-8859-8',
4280
        'ISO-8859-9',
4281
        'ISO-8859-10',
4282 2
        'ISO-8859-13',
4283
        'ISO-8859-14',
4284
        'ISO-8859-15',
4285 2
        'ISO-8859-16',
4286
        'WINDOWS-1251',
4287 2
        'WINDOWS-1252',
4288
        'WINDOWS-1254',
4289
        'ISO-2022-JP',
4290
        'JIS',
4291
        'EUC-JP',
4292
    );
4293
4294
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4295
    if ($encoding) {
4296
      return $encoding;
4297
    }
4298
4299
    //
4300
    // 5.) check via "iconv()"
4301
    //
4302
4303
    $md5 = md5($str);
4304
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4305
      # INFO: //IGNORE and //TRANSLIT still throw notice
4306
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4307
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4308
        return $encodingTmp;
4309
      }
4310
    }
4311
4312
    return false;
4313
  }
4314 8
4315
  /**
4316 8
   * Check if the string ends with the given substring.
4317 8
   *
4318
   * @param string $haystack <p>The string to search in.</p>
4319 8
   * @param string $needle   <p>The substring to search for.</p>
4320 3
   *
4321
   * @return bool
4322
   */
4323 7 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4324 1
  {
4325 1
    $haystack = (string)$haystack;
4326 1
    $needle = (string)$needle;
4327
4328
    if (!isset($haystack[0], $needle[0])) {
4329
      return false;
4330 7
    }
4331 1
4332 7
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4333 7
      return true;
4334 7
    }
4335
4336
    return false;
4337
  }
4338 7
4339
  /**
4340
   * Check if the string ends with the given substring, case insensitive.
4341
   *
4342
   * @param string $haystack <p>The string to search in.</p>
4343
   * @param string $needle   <p>The substring to search for.</p>
4344
   *
4345
   * @return bool
4346
   */
4347 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4348
  {
4349
    $haystack = (string)$haystack;
4350
    $needle = (string)$needle;
4351
4352
    if (!isset($haystack[0], $needle[0])) {
4353
      return false;
4354
    }
4355 8
4356
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4357 8
      return true;
4358 2
    }
4359
4360
    return false;
4361 6
  }
4362
4363
  /**
4364
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4365 6
   *
4366
   * @link  http://php.net/manual/en/function.str-ireplace.php
4367
   *
4368
   * @param mixed $search  <p>
4369
   *                       Every replacement with search array is
4370
   *                       performed on the result of previous replacement.
4371
   *                       </p>
4372 6
   * @param mixed $replace <p>
4373
   *                       </p>
4374
   * @param mixed $subject <p>
4375
   *                       If subject is an array, then the search and
4376
   *                       replace is performed with every entry of
4377
   *                       subject, and the return value is an array as
4378
   *                       well.
4379
   *                       </p>
4380
   * @param int   $count   [optional] <p>
4381
   *                       The number of matched and replaced needles will
4382
   *                       be returned in count which is passed by
4383
   *                       reference.
4384
   *                       </p>
4385
   *
4386
   * @return mixed <p>A string or an array of replacements.</p>
4387 62
   */
4388
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4389 62
  {
4390
    $search = (array)$search;
4391 62
4392 4
    /** @noinspection AlterInForeachInspection */
4393
    foreach ($search as &$s) {
4394
      if ('' === $s .= '') {
4395
        $s = '/^(?<=.)$/';
4396
      } else {
4397 61
        $s = '/' . preg_quote($s, '/') . '/ui';
4398 2
      }
4399 61
    }
4400 60
4401 60
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4402 2
    $count = $replace; // used as reference parameter
4403
4404
    return $subject;
4405
  }
4406 61
4407 61
  /**
4408 1
   * Check if the string starts with the given substring, case insensitive.
4409
   *
4410
   * @param string $haystack <p>The string to search in.</p>
4411 61
   * @param string $needle   <p>The substring to search for.</p>
4412 2
   *
4413 2
   * @return bool
4414
   */
4415 61 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4416
  {
4417
    $haystack = (string)$haystack;
4418
    $needle = (string)$needle;
4419
4420
    if (!isset($haystack[0], $needle[0])) {
4421
      return false;
4422
    }
4423
4424
    if (self::stripos($haystack, $needle) === 0) {
4425
      return true;
4426
    }
4427
4428
    return false;
4429
  }
4430 1
4431
  /**
4432 1
   * Limit the number of characters in a string, but also after the next word.
4433
   *
4434
   * @param string $str
4435
   * @param int    $length
4436
   * @param string $strAddOn
4437
   *
4438
   * @return string
4439
   */
4440
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4441
  {
4442
    $str = (string)$str;
4443
4444
    if (!isset($str[0])) {
4445
      return '';
4446
    }
4447
4448
    $length = (int)$length;
4449 2
4450
    if (self::strlen($str) <= $length) {
4451 2
      return $str;
4452
    }
4453
4454
    if (self::substr($str, $length - 1, 1) === ' ') {
4455
      return self::substr($str, 0, $length - 1) . $strAddOn;
4456
    }
4457
4458
    $str = self::substr($str, 0, $length);
4459
    $array = explode(' ', $str);
4460
    array_pop($array);
4461
    $new_str = implode(' ', $array);
4462
4463
    if ($new_str === '') {
4464
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4465
    } else {
4466
      $str = $new_str . $strAddOn;
4467 1
    }
4468
4469 1
    return $str;
4470
  }
4471
4472
  /**
4473
   * Pad a UTF-8 string to given length with another string.
4474
   *
4475
   * @param string $str        <p>The input string.</p>
4476
   * @param int    $pad_length <p>The length of return string.</p>
4477
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4478
   * @param int    $pad_type   [optional] <p>
4479
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4480
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4481
   *                           </p>
4482
   *
4483
   * @return string <strong>Returns the padded string</strong>
4484
   */
4485 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4486
  {
4487 2
    $str_length = self::strlen($str);
4488 2
4489
    if (
4490 2
        is_int($pad_length) === true
4491
        &&
4492
        $pad_length > 0
4493
        &&
4494
        $pad_length >= $str_length
4495
    ) {
4496
      $ps_length = self::strlen($pad_string);
4497
4498
      $diff = $pad_length - $str_length;
4499
4500
      switch ($pad_type) {
4501 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4502
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4503 1
          $pre = self::substr($pre, 0, $diff);
4504
          $post = '';
4505 1
          break;
4506 1
4507
        case STR_PAD_BOTH:
4508 1
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4509 1
          $pre = self::substr($pre, 0, (int)$diff / 2);
4510
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4511
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4512 1
          break;
4513 1
4514
        case STR_PAD_RIGHT:
4515 1 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4516
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4517
          $post = self::substr($post, 0, $diff);
4518
          $pre = '';
4519
      }
4520
4521
      return $pre . $str . $post;
4522
    }
4523
4524
    return $str;
4525
  }
4526
4527
  /**
4528
   * Repeat a string.
4529
   *
4530
   * @param string $str        <p>
4531
   *                           The string to be repeated.
4532
   *                           </p>
4533
   * @param int    $multiplier <p>
4534
   *                           Number of time the input string should be
4535 15
   *                           repeated.
4536
   *                           </p>
4537 15
   *                           <p>
4538 15
   *                           multiplier has to be greater than or equal to 0.
4539
   *                           If the multiplier is set to 0, the function
4540 15
   *                           will return an empty string.
4541 2
   *                           </p>
4542
   *
4543
   * @return string <p>The repeated string.</p>
4544
   */
4545 14
  public static function str_repeat($str, $multiplier)
4546
  {
4547
    $str = self::filter($str);
4548
4549 14
    return str_repeat($str, $multiplier);
4550
  }
4551
4552
  /**
4553 14
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4554
   *
4555
   * Replace all occurrences of the search string with the replacement string
4556 2
   *
4557 2
   * @link http://php.net/manual/en/function.str-replace.php
4558 2
   *
4559
   * @param mixed $search  <p>
4560 14
   *                       The value being searched for, otherwise known as the needle.
4561
   *                       An array may be used to designate multiple needles.
4562
   *                       </p>
4563
   * @param mixed $replace <p>
4564
   *                       The replacement value that replaces found search
4565
   *                       values. An array may be used to designate multiple replacements.
4566 14
   *                       </p>
4567 2
   * @param mixed $subject <p>
4568 14
   *                       The string or array being searched and replaced on,
4569 14
   *                       otherwise known as the haystack.
4570 14
   *                       </p>
4571 1
   *                       <p>
4572
   *                       If subject is an array, then the search and
4573
   *                       replace is performed with every entry of
4574 14
   *                       subject, and the return value is an array as
4575 14
   *                       well.
4576
   *                       </p>
4577
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4578
   *
4579
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4580
   */
4581
  public static function str_replace($search, $replace, $subject, &$count = null)
4582
  {
4583
    return str_replace($search, $replace, $subject, $count);
4584
  }
4585
4586
  /**
4587
   * Replace the first "$search"-term with the "$replace"-term.
4588
   *
4589
   * @param string $search
4590
   * @param string $replace
4591
   * @param string $subject
4592
   *
4593
   * @return string
4594
   */
4595
  public static function str_replace_first($search, $replace, $subject)
4596
  {
4597
    $pos = self::strpos($subject, $search);
4598
4599
    if ($pos !== false) {
4600
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4601
    }
4602
4603
    return $subject;
4604
  }
4605
4606
  /**
4607
   * Shuffles all the characters in the string.
4608
   *
4609
   * @param string $str <p>The input string</p>
4610
   *
4611
   * @return string <p>The shuffled string.</p>
4612
   */
4613
  public static function str_shuffle($str)
4614
  {
4615
    $array = self::split($str);
4616
4617
    shuffle($array);
4618
4619
    return implode('', $array);
4620 1
  }
4621
4622 1
  /**
4623 1
   * Sort all characters according to code points.
4624 1
   *
4625
   * @param string $str    <p>A UTF-8 string.</p>
4626 1
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4627
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4628
   *
4629
   * @return string <p>String of sorted characters.</p>
4630
   */
4631
  public static function str_sort($str, $unique = false, $desc = false)
4632
  {
4633 1
    $array = self::codepoints($str);
4634
4635
    if ($unique) {
4636
      $array = array_flip(array_flip($array));
4637
    }
4638
4639
    if ($desc) {
4640
      arsort($array);
4641
    } else {
4642
      asort($array);
4643 4
    }
4644
4645 4
    return self::string($array);
4646
  }
4647 4
4648 2
  /**
4649
   * Split a string into an array.
4650
   *
4651 3
   * @param string $str
4652
   * @param int    $len
4653
   *
4654
   * @return array
4655
   */
4656
  public static function str_split($str, $len = 1)
4657
  {
4658
    // init
4659
    $len = (int)$len;
4660
    $str = (string)$str;
4661
4662
    if (!isset($str[0])) {
4663
      return array();
4664
    }
4665
4666
    if ($len < 1) {
4667
      return str_split($str, $len);
4668
    }
4669
4670
    /** @noinspection PhpInternalEntityUsedInspection */
4671
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4672
    $a = $a[0];
4673
4674
    if ($len === 1) {
4675
      return $a;
4676
    }
4677 1
4678
    $arrayOutput = array();
4679 1
    $p = -1;
4680 1
4681 1
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4682
    foreach ($a as $l => $a) {
4683 1
      if ($l % $len) {
4684
        $arrayOutput[$p] .= $a;
4685
      } else {
4686
        $arrayOutput[++$p] = $a;
4687
      }
4688
    }
4689
4690 1
    return $arrayOutput;
4691
  }
4692
4693
  /**
4694
   * Check if the string starts with the given substring.
4695
   *
4696
   * @param string $haystack <p>The string to search in.</p>
4697
   * @param string $needle   <p>The substring to search for.</p>
4698
   *
4699
   * @return bool
4700
   */
4701 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4702
  {
4703
    $haystack = (string)$haystack;
4704
    $needle = (string)$needle;
4705
4706
    if (!isset($haystack[0], $needle[0])) {
4707 1
      return false;
4708
    }
4709 1
4710
    if (self::strpos($haystack, $needle) === 0) {
4711
      return true;
4712
    }
4713
4714
    return false;
4715
  }
4716
4717
  /**
4718
   * Get a binary representation of a specific string.
4719
   *
4720
   * @param string $str <p>The input string.</p>
4721
   *
4722
   * @return string
4723
   */
4724
  public static function str_to_binary($str)
4725
  {
4726
    $str = (string)$str;
4727
4728
    $value = unpack('H*', $str);
4729 11
4730
    return base_convert($value[1], 16, 2);
4731 11
  }
4732
4733 11
  /**
4734 2
   * Convert a string into an array of words.
4735 2
   *
4736
   * @param string $str
4737 11
   * @param string $charlist
4738
   *
4739 11
   * @return array
4740 2
   */
4741
  public static function str_to_words($str, $charlist = '')
4742
  {
4743
    $str = (string)$str;
4744 10
4745 10
    if (!isset($str[0])) {
4746
      return array('');
4747
    }
4748
4749 10
    $charlist = self::rxClass($charlist, '\pL');
4750
4751 10
    return \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4752
  }
4753
4754 3
  /**
4755 3
   * alias for "UTF8::to_ascii()"
4756 3
   *
4757
   * @see UTF8::to_ascii()
4758 10
   *
4759
   * @param string $str
4760
   * @param string $unknown
4761
   * @param bool   $strict
4762
   *
4763
   * @return string
4764 10
   */
4765 1
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4766 10
  {
4767 10
    return self::to_ascii($str, $unknown, $strict);
4768 10
  }
4769 1
4770
  /**
4771
   * Counts number of words in the UTF-8 string.
4772
   *
4773
   * @param string $str      <p>The input string.</p>
4774 10
   * @param int    $format   [optional] <p>
4775 10
   *                         <strong>0</strong> => return a number of words (default)<br />
4776 10
   *                         <strong>1</strong> => return an array of words<br />
4777 10
   *                         <strong>2</strong> => return an array of words with word-offset as key
4778
   *                         </p>
4779
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4780
   *
4781
   * @return array|int <p>The number of words in the string</p>
4782
   */
4783
  public static function str_word_count($str, $format = 0, $charlist = '')
4784
  {
4785
    $strParts = self::str_to_words($str, $charlist);
4786
4787
    $len = count($strParts);
4788
4789
    if ($format === 1) {
4790
4791
      $numberOfWords = array();
4792
      for ($i = 1; $i < $len; $i += 2) {
4793
        $numberOfWords[] = $strParts[$i];
4794
      }
4795
4796
    } elseif ($format === 2) {
4797
4798
      $numberOfWords = array();
4799
      $offset = self::strlen($strParts[0]);
4800
      for ($i = 1; $i < $len; $i += 2) {
4801
        $numberOfWords[$offset] = $strParts[$i];
4802
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4803
      }
4804
4805
    } else {
4806
4807
      $numberOfWords = ($len - 1) / 2;
4808
4809
    }
4810
4811
    return $numberOfWords;
4812
  }
4813 10
4814
  /**
4815
   * Case-insensitive string comparison.
4816 10
   *
4817 10
   * INFO: Case-insensitive version of UTF8::strcmp()
4818
   *
4819 10
   * @param string $str1
4820 2
   * @param string $str2
4821 2
   *
4822
   * @return int <p>
4823 10
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4824 10
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4825 2
   *             <strong>0</strong> if they are equal.
4826
   *             </p>
4827
   */
4828 8
  public static function strcasecmp($str1, $str2)
4829
  {
4830
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4831
  }
4832
4833
  /**
4834
   * alias for "UTF8::strstr()"
4835
   *
4836
   * @see UTF8::strstr()
4837
   *
4838
   * @param string  $haystack
4839
   * @param string  $needle
4840
   * @param bool    $before_needle
4841
   * @param string  $encoding
4842
   * @param boolean $cleanUtf8
4843
   *
4844
   * @return string|false
4845 2
   */
4846
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4847 2
  {
4848
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4849
  }
4850
4851
  /**
4852
   * Case-sensitive string comparison.
4853
   *
4854 2
   * @param string $str1
4855 1
   * @param string $str2
4856 1
   *
4857
   * @return int  <p>
4858
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4859
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4860 2
   *              <strong>0</strong> if they are equal.
4861 2
   *              </p>
4862 2
   */
4863 2
  public static function strcmp($str1, $str2)
4864
  {
4865
    /** @noinspection PhpUndefinedClassInspection */
4866
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4867
        \Normalizer::normalize($str1, \Normalizer::NFD),
4868
        \Normalizer::normalize($str2, \Normalizer::NFD)
4869
    );
4870
  }
4871
4872
  /**
4873
   * Find length of initial segment not matching mask.
4874
   *
4875
   * @param string $str
4876
   * @param string $charList
4877
   * @param int    $offset
4878
   * @param int    $length
4879
   *
4880
   * @return int|null
4881
   */
4882 11
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4883
  {
4884 11
    if ('' === $charList .= '') {
4885 11
      return null;
4886 11
    }
4887
4888 11
    if ($offset || 2147483647 !== $length) {
4889 1
      $str = (string)self::substr($str, $offset, $length);
4890 1
    }
4891 1
4892
    $str = (string)$str;
4893 11
    if (!isset($str[0])) {
4894
      return null;
4895 11
    }
4896
4897 11
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4898 1
      /** @noinspection OffsetOperationsInspection */
4899 1
      return self::strlen($length[1]);
4900
    }
4901
4902 11
    return self::strlen($str);
4903 11
  }
4904
4905 11
  /**
4906
   * alias for "UTF8::stristr()"
4907 11
   *
4908
   * @see UTF8::stristr()
4909
   *
4910
   * @param string  $haystack
4911
   * @param string  $needle
4912
   * @param bool    $before_needle
4913
   * @param string  $encoding
4914
   * @param boolean $cleanUtf8
4915
   *
4916
   * @return string|false
4917
   */
4918
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4919
  {
4920
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4921 21
  }
4922
4923
  /**
4924 21
   * Create a UTF-8 string from code points.
4925
   *
4926 21
   * INFO: opposite to UTF8::codepoints()
4927 6
   *
4928
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4929
   *
4930 19
   * @return string <p>UTF-8 encoded string.</p>
4931
   */
4932
  public static function string(array $array)
4933
  {
4934
    return implode(
4935
        '',
4936 19
        array_map(
4937 2
            array(
4938 2
                '\\voku\\helper\\UTF8',
4939
                'chr',
4940 19
            ),
4941
            $array
4942
        )
4943
    );
4944
  }
4945
4946
  /**
4947
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4948
   *
4949
   * @param string $str <p>The input string.</p>
4950 3
   *
4951
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4952 3
   */
4953
  public static function string_has_bom($str)
4954
  {
4955
    foreach (self::$BOM as $bomString => $bomByteLength) {
4956
      if (0 === strpos($str, $bomString)) {
4957
        return true;
4958
      }
4959
    }
4960
4961
    return false;
4962
  }
4963
4964
  /**
4965
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4966 16
   *
4967
   * @link http://php.net/manual/en/function.strip-tags.php
4968 16
   *
4969
   * @param string  $str            <p>
4970 16
   *                                The input string.
4971 2
   *                                </p>
4972
   * @param string  $allowable_tags [optional] <p>
4973
   *                                You can use the optional second parameter to specify tags which should
4974 15
   *                                not be stripped.
4975
   *                                </p>
4976
   *                                <p>
4977
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4978
   *                                can not be changed with allowable_tags.
4979
   *                                </p>
4980 15
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
4981 2
   *
4982 2
   * @return string <p>The stripped string.</p>
4983
   */
4984 15
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
4985
  {
4986
    $str = (string)$str;
4987
4988
    if (!isset($str[0])) {
4989
      return '';
4990
    }
4991
4992
    if ($cleanUtf8) {
4993
      $str = self::clean($str);
4994
    }
4995
4996
    return strip_tags($str, $allowable_tags);
4997
  }
4998
4999
  /**
5000
   * Finds position of first occurrence of a string within another, case insensitive.
5001 1
   *
5002
   * @link http://php.net/manual/en/function.mb-stripos.php
5003 1
   *
5004 1
   * @param string  $haystack  <p>
5005 1
   *                           The string from which to get the position of the first occurrence
5006 1
   *                           of needle
5007 1
   *                           </p>
5008
   * @param string  $needle    <p>
5009 1
   *                           The string to find in haystack
5010 1
   *                           </p>
5011 1
   * @param int     $offset    [optional] <p>
5012 1
   *                           The position in haystack
5013 1
   *                           to start searching
5014
   *                           </p>
5015 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5016 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5017
   *
5018 1
   * @return int|false <p>
5019
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
5020
   *                   or false if needle is not found.
5021
   *                   </p>
5022
   */
5023
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5024
  {
5025
    $haystack = (string)$haystack;
5026
    $needle = (string)$needle;
5027
    $offset = (int)$offset;
5028
5029
    if (!isset($haystack[0], $needle[0])) {
5030 1
      return false;
5031
    }
5032 1
5033 1
    if ($cleanUtf8 === true) {
5034 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5035
      // if invalid characters are found in $haystack before $needle
5036 1
      $haystack = self::clean($haystack);
5037
      $needle = self::clean($needle);
5038
    }
5039
5040 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5041 1
        $encoding === 'UTF-8'
5042
        ||
5043 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5044
    ) {
5045
      $encoding = 'UTF-8';
5046
    } else {
5047
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5048
    }
5049
5050
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5051
      self::checkForSupport();
5052
    }
5053
5054
    if (
5055
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5056
        &&
5057
        self::$SUPPORT['intl'] === true
5058
        &&
5059 47
        Bootup::is_php('5.4') === true
5060
    ) {
5061
      return \grapheme_stripos($haystack, $needle, $offset);
5062 47
    }
5063
5064 47
    // fallback to "mb_"-function via polyfill
5065 9
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5066
  }
5067
5068 45
  /**
5069
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5070
   *
5071
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5072 1
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5073 1
   * @param bool    $before_needle [optional] <p>
5074
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5075 45
   *                               haystack before the first occurrence of the needle (excluding the needle).
5076 45
   *                               </p>
5077 37
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5078 37
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5079
   *
5080 45
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
5081 2
   */
5082 View Code Duplication
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5083
  {
5084 43
    $haystack = (string)$haystack;
5085 20
    $needle = (string)$needle;
5086 20
    $before_needle = (bool)$before_needle;
5087 41
5088
    if (!isset($haystack[0], $needle[0])) {
5089
      return false;
5090 43
    }
5091
5092
    if ($encoding !== 'UTF-8') {
5093
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5094
    }
5095
5096 43
    if ($cleanUtf8 === true) {
5097 2
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5098 43
      // if invalid characters are found in $haystack before $needle
5099 43
      $needle = self::clean($needle);
5100 43
      $haystack = self::clean($haystack);
5101 1
    }
5102
5103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5104 43
      self::checkForSupport();
5105 43
    }
5106
5107
    if (
5108
        $encoding !== 'UTF-8'
5109
        &&
5110
        self::$SUPPORT['mbstring'] === false
5111
    ) {
5112
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5113
    }
5114
5115
    if (self::$SUPPORT['mbstring'] === true) {
5116
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5117
    }
5118
5119
    if (
5120
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5121
        &&
5122
        self::$SUPPORT['intl'] === true
5123
        &&
5124
        Bootup::is_php('5.4') === true
5125
    ) {
5126
      return \grapheme_stristr($haystack, $needle, $before_needle);
5127
    }
5128
5129
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5130
5131
    if (!isset($match[1])) {
5132
      return false;
5133
    }
5134
5135 1
    if ($before_needle) {
5136
      return $match[1];
5137 1
    }
5138 1
5139
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 5100 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5140 1
  }
5141
5142
  /**
5143
   * Get the string length, not the byte-length!
5144
   *
5145
   * @link     http://php.net/manual/en/function.mb-strlen.php
5146
   *
5147
   * @param string  $str       <p>The string being checked for length.</p>
5148
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5149
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5150
   *
5151
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5152
   *             character counted as +1)</p>
5153
   */
5154
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5155
  {
5156
    $str = (string)$str;
5157
5158
    if (!isset($str[0])) {
5159
      return 0;
5160
    }
5161 1
5162 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5163 1
        $encoding === 'UTF-8'
5164 1
        ||
5165
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5166 1
    ) {
5167 1
      $encoding = 'UTF-8';
5168
    } else {
5169
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5170 1
    }
5171 1
5172 1
    switch ($encoding) {
5173
      case 'ASCII':
5174 1
      case 'CP850':
5175 1
        if (
5176
            $encoding === 'CP850'
5177
            &&
5178 1
            self::$SUPPORT['mbstring_func_overload'] === false
5179 1
        ) {
5180
          return strlen($str);
5181 1
        }
5182 1
5183 1
        return \mb_strlen($str, '8BIT');
5184
    }
5185 1
5186
    if ($cleanUtf8 === true) {
5187
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5188
      // if invalid characters are found in $str
5189
      $str = self::clean($str);
5190
    }
5191
5192 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5193
      self::checkForSupport();
5194
    }
5195
5196 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5197
        $encoding !== 'UTF-8'
5198
        &&
5199
        self::$SUPPORT['mbstring'] === false
5200
        &&
5201
        self::$SUPPORT['iconv'] === false
5202
    ) {
5203
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5204
    }
5205
5206
    if (
5207 6
        $encoding !== 'UTF-8'
5208
        &&
5209 6
        self::$SUPPORT['iconv'] === true
5210 1
        &&
5211
        self::$SUPPORT['mbstring'] === false
5212
    ) {
5213 1
      return \iconv_strlen($str, $encoding);
5214 1
    }
5215 1
5216 1
    if (self::$SUPPORT['mbstring'] === true) {
5217
      return \mb_strlen($str, $encoding);
5218
    }
5219
5220 1
    if (self::$SUPPORT['iconv'] === true) {
5221 1
      return \iconv_strlen($str, $encoding);
5222 1
    }
5223 1
5224 1
    if (
5225 1
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5226 1
        &&
5227 1
        self::$SUPPORT['intl'] === true
5228
        &&
5229
        Bootup::is_php('5.4') === true
5230
    ) {
5231 1
      return \grapheme_strlen($str);
5232 1
    }
5233 1
5234 1
    // fallback via vanilla php
5235 1
    preg_match_all('/./us', $str, $parts);
5236 1
    $returnTmp = count($parts[0]);
5237 1
    if ($returnTmp !== 0) {
5238 1
      return $returnTmp;
5239
    }
5240
5241 1
    // fallback to "mb_"-function via polyfill
5242 1
    return \mb_strlen($str, $encoding);
5243 1
  }
5244 1
5245
  /**
5246
   * Case insensitive string comparisons using a "natural order" algorithm.
5247
   *
5248 1
   * INFO: natural order version of UTF8::strcasecmp()
5249
   *
5250 6
   * @param string $str1 <p>The first string.</p>
5251 1
   * @param string $str2 <p>The second string.</p>
5252 1
   *
5253 1
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5254 1
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5255
   *             <strong>0</strong> if they are equal
5256 1
   */
5257
  public static function strnatcasecmp($str1, $str2)
5258
  {
5259 6
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5260 6
  }
5261
5262 6
  /**
5263 4
   * String comparisons using a "natural order" algorithm
5264 4
   *
5265
   * INFO: natural order version of UTF8::strcmp()
5266 6
   *
5267
   * @link  http://php.net/manual/en/function.strnatcmp.php
5268 6
   *
5269
   * @param string $str1 <p>The first string.</p>
5270
   * @param string $str2 <p>The second string.</p>
5271
   *
5272
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5273
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5274
   *             <strong>0</strong> if they are equal
5275
   */
5276
  public static function strnatcmp($str1, $str2)
5277
  {
5278
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5279
  }
5280 1
5281
  /**
5282 1
   * Case-insensitive string comparison of the first n characters.
5283
   *
5284 1
   * @link  http://php.net/manual/en/function.strncasecmp.php
5285 1
   *
5286
   * @param string $str1 <p>The first string.</p>
5287
   * @param string $str2 <p>The second string.</p>
5288 1
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5289 1
   *
5290 1
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5291
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5292 1
   *             <strong>0</strong> if they are equal
5293
   */
5294
  public static function strncasecmp($str1, $str2, $len)
5295 1
  {
5296 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5297
  }
5298 1
5299 1
  /**
5300
   * String comparison of the first n characters.
5301 1
   *
5302
   * @link  http://php.net/manual/en/function.strncmp.php
5303 1
   *
5304 1
   * @param string $str1 <p>The first string.</p>
5305
   * @param string $str2 <p>The second string.</p>
5306 1
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5307
   *
5308 1
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5309
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5310 1
   *             <strong>0</strong> if they are equal
5311
   */
5312 1
  public static function strncmp($str1, $str2, $len)
5313
  {
5314
    $str1 = self::substr($str1, 0, $len);
5315
    $str2 = self::substr($str2, 0, $len);
5316
5317
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5314 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5315 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5318
  }
5319
5320
  /**
5321
   * Search a string for any of a set of characters.
5322
   *
5323
   * @link  http://php.net/manual/en/function.strpbrk.php
5324
   *
5325
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5326 7
   * @param string $char_list <p>This parameter is case sensitive.</p>
5327
   *
5328 7
   * @return string String starting from the character found, or false if it is not found.
5329
   */
5330
  public static function strpbrk($haystack, $char_list)
5331
  {
5332
    $haystack = (string)$haystack;
5333
    $char_list = (string)$char_list;
5334
5335
    if (!isset($haystack[0], $char_list[0])) {
5336
      return false;
5337
    }
5338
5339
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5340 1
      return substr($haystack, strpos($haystack, $m[0]));
5341
    }
5342 1
5343
    return false;
5344
  }
5345
5346
  /**
5347
   * Find position of first occurrence of string in a string.
5348
   *
5349
   * @link http://php.net/manual/en/function.mb-strpos.php
5350
   *
5351
   * @param string  $haystack  <p>The string being checked.</p>
5352
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5353
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5354 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5355
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5356 1
   *
5357
   * @return int|false <p>
5358
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5359
   *                   If needle is not found it returns false.
5360
   *                   </p>
5361
   */
5362
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5363
  {
5364
    $haystack = (string)$haystack;
5365
    $needle = (string)$needle;
5366
5367
    if (!isset($haystack[0], $needle[0])) {
5368 1
      return false;
5369
    }
5370 1
5371
    // init
5372
    $offset = (int)$offset;
5373
5374
    // iconv and mbstring do not support integer $needle
5375
5376
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5377
      $needle = (string)self::chr($needle);
5378
    }
5379
5380
    if ($cleanUtf8 === true) {
5381
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5382
      // if invalid characters are found in $haystack before $needle
5383
      $needle = self::clean($needle);
5384
      $haystack = self::clean($haystack);
5385 13
    }
5386
5387 13 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5388
        $encoding === 'UTF-8'
5389
        ||
5390 13
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5391
    ) {
5392 13
      $encoding = 'UTF-8';
5393 3
    } else {
5394
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5395
    }
5396 11
5397
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5398
      self::checkForSupport();
5399 11
    }
5400 7
5401
    if (
5402
        $encoding === 'CP850'
5403 5
        &&
5404 1
        self::$SUPPORT['mbstring_func_overload'] === false
5405
    ) {
5406
      return strpos($haystack, $needle, $offset);
5407
    }
5408 1
5409 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5410
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5411
        &
5412 1
        self::$SUPPORT['iconv'] === true
5413 1
        &&
5414
        self::$SUPPORT['mbstring'] === false
5415
    ) {
5416 1
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5417
    }
5418
5419 1
    if (
5420
        $offset >= 0 // iconv_strpos() can't handle negative offset
5421 5
        &&
5422 5
        $encoding !== 'UTF-8'
5423 5
        &&
5424
        self::$SUPPORT['mbstring'] === false
5425 5
        &&
5426
        self::$SUPPORT['iconv'] === true
5427 5
    ) {
5428 5
      // ignore invalid negative offset to keep compatibility
5429
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5430
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5431 5
    }
5432
5433
    if (self::$SUPPORT['mbstring'] === true) {
5434 5
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5435 5
    }
5436 5
5437
    if (
5438 5
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5439 2
        &&
5440
        self::$SUPPORT['intl'] === true
5441 2
        &&
5442 2
        Bootup::is_php('5.4') === true
5443 2
    ) {
5444
      return \grapheme_strpos($haystack, $needle, $offset);
5445 2
    }
5446 1
5447
    if (
5448 1
        $offset >= 0 // iconv_strpos() can't handle negative offset
5449 1
        &&
5450 1
        self::$SUPPORT['iconv'] === true
5451
    ) {
5452 1
      // ignore invalid negative offset to keep compatibility
5453
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5454
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5455
    }
5456
5457
    // fallback via vanilla php
5458
5459
    $haystack = self::substr($haystack, $offset);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::substr($haystack, $offset) on line 5459 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5460
5461
    if ($offset < 0) {
5462
      $offset = 0;
5463
    }
5464
5465
    $pos = strpos($haystack, $needle);
5466
    if ($pos === false) {
5467 1
      return false;
5468 2
    }
5469
5470 5
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5471
    if ($returnTmp !== false) {
5472
      return $returnTmp;
5473
    }
5474
5475 5
    // fallback to "mb_"-function via polyfill
5476
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5477
  }
5478
5479
  /**
5480 5
   * Finds the last occurrence of a character in a string within another.
5481 5
   *
5482 1
   * @link http://php.net/manual/en/function.mb-strrchr.php
5483 1
   *
5484
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5485 1
   * @param string $needle        <p>The string to find in haystack</p>
5486 1
   * @param bool   $before_needle [optional] <p>
5487 1
   *                              Determines which portion of haystack
5488
   *                              this function returns.
5489 1
   *                              If set to true, it returns all of haystack
5490
   *                              from the beginning to the last occurrence of needle.
5491 5
   *                              If set to false, it returns all of haystack
5492 5
   *                              from the last occurrence of needle to the end,
5493 5
   *                              </p>
5494 5
   * @param string $encoding      [optional] <p>
5495 1
   *                              Character encoding name to use.
5496
   *                              If it is omitted, internal character encoding is used.
5497 5
   *                              </p>
5498
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5499 5
   *
5500
   * @return string|false The portion of haystack or false if needle is not found.
5501
   */
5502 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5503
  {
5504
    if ($encoding !== 'UTF-8') {
5505
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5506
    }
5507
5508
    if ($cleanUtf8 === true) {
5509 2
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5510
      // if invalid characters are found in $haystack before $needle
5511 2
      $needle = self::clean($needle);
5512
      $haystack = self::clean($haystack);
5513 1
    }
5514
5515
    // fallback to "mb_"-function via polyfill
5516 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5517 1
  }
5518
5519 1
  /**
5520
   * Reverses characters order in the string.
5521
   *
5522 2
   * @param string $str The input string
5523
   *
5524 2
   * @return string The string with characters in the reverse sequence
5525 1
   */
5526
  public static function strrev($str)
5527
  {
5528 2
    $str = (string)$str;
5529
5530
    if (!isset($str[0])) {
5531
      return '';
5532
    }
5533
5534
    return implode('', array_reverse(self::split($str)));
5535
  }
5536
5537
  /**
5538
   * Finds the last occurrence of a character in a string within another, case insensitive.
5539
   *
5540 1
   * @link http://php.net/manual/en/function.mb-strrichr.php
5541
   *
5542 1
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5543
   * @param string  $needle        <p>The string to find in haystack.</p>
5544
   * @param bool    $before_needle [optional] <p>
5545
   *                               Determines which portion of haystack
5546
   *                               this function returns.
5547
   *                               If set to true, it returns all of haystack
5548
   *                               from the beginning to the last occurrence of needle.
5549
   *                               If set to false, it returns all of haystack
5550
   *                               from the last occurrence of needle to the end,
5551
   *                               </p>
5552
   * @param string  $encoding      [optional] <p>
5553
   *                               Character encoding name to use.
5554
   *                               If it is omitted, internal character encoding is used.
5555
   *                               </p>
5556
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5557
   *
5558
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5559
   */
5560 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5561
  {
5562
    if ($encoding !== 'UTF-8') {
5563
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5564
    }
5565
5566
    if ($cleanUtf8 === true) {
5567
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5568 20
      // if invalid characters are found in $haystack before $needle
5569
      $needle = self::clean($needle);
5570 20
      $haystack = self::clean($haystack);
5571 2
    }
5572
5573
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5574 2
  }
5575 2
5576
  /**
5577 2
   * Find position of last occurrence of a case-insensitive string.
5578
   *
5579
   * @param string  $haystack  <p>The string to look in.</p>
5580 20
   * @param string  $needle    <p>The string to look for.</p>
5581
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5582 20
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5583 4
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5584
   *
5585
   * @return int|false <p>
5586 19
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5587 19
   *                   not found, it returns false.
5588
   *                   </p>
5589
   */
5590 19
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5591 19
  {
5592
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5593 19
      $needle = (string)self::chr($needle);
5594 19
    }
5595 19
5596 19
    // init
5597
    $haystack = (string)$haystack;
5598 19
    $needle = (string)$needle;
5599
    $offset = (int)$offset;
5600 16
5601 16
    if (!isset($haystack[0], $needle[0])) {
5602 16
      return false;
5603 16
    }
5604 5
5605 5 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5606 5
        $cleanUtf8 === true
5607
        ||
5608
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5609 19
    ) {
5610
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5611 17
5612 13
      $needle = self::clean($needle);
5613 13
      $haystack = self::clean($haystack);
5614 13
    }
5615 8
5616 8 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5617 8
        $encoding === 'UTF-8'
5618
        ||
5619
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5620 19
    ) {
5621
      $encoding = 'UTF-8';
5622 9
    } else {
5623 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5624 4
    }
5625 4
5626 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5627 6
      self::checkForSupport();
5628 6
    }
5629
5630
    if (
5631 9
        $encoding !== 'UTF-8'
5632 6
        &&
5633 6
        self::$SUPPORT['mbstring'] === false
5634 6
    ) {
5635
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5636
    }
5637 19
5638
    if (self::$SUPPORT['mbstring'] === true) {
5639 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5640 4
    }
5641 2
5642 2
    if (
5643 3
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5644 3
        &&
5645 3
        self::$SUPPORT['intl'] === true
5646
        &&
5647
        Bootup::is_php('5.4') === true
5648 4
    ) {
5649 16
      return \grapheme_strripos($haystack, $needle, $offset);
5650
    }
5651 19
5652
    // fallback via vanilla php
5653
5654 19
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 5613 can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $needle defined by self::clean($needle) on line 5612 can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5655 19
  }
5656
5657 3
  /**
5658 19
   * Find position of last occurrence of a string in a string.
5659
   *
5660 19
   * @link http://php.net/manual/en/function.mb-strrpos.php
5661
   *
5662
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5663 19
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5664 19
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5665 19
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5666 2
   *                              the end of the string.
5667 19
   *                              </p>
5668
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5669 19
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5670
   *
5671 19
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5672
   *                   is not found, it returns false.</p>
5673
   */
5674
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5675
  {
5676
    if ((int)$needle === $needle && $needle >= 0) {
5677
      $needle = (string)self::chr($needle);
5678
    }
5679
5680
    // init
5681
    $haystack = (string)$haystack;
5682
    $needle = (string)$needle;
5683
    $offset = (int)$offset;
5684
5685
    if (!isset($haystack[0], $needle[0])) {
5686
      return false;
5687 26
    }
5688
5689 26 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5690
        $cleanUtf8 === true
5691 26
        ||
5692 5
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5693
    ) {
5694
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5695
      $needle = self::clean($needle);
5696 22
      $haystack = self::clean($haystack);
5697 6
    }
5698
5699 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5700 16
        $encoding === 'UTF-8'
5701
        ||
5702
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5703
    ) {
5704
      $encoding = 'UTF-8';
5705
    } else {
5706
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5707
    }
5708
5709
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5710
      self::checkForSupport();
5711
    }
5712 14
5713
    if (
5714 14
        $encoding !== 'UTF-8'
5715
        &&
5716
        self::$SUPPORT['mbstring'] === false
5717
    ) {
5718
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5719
    }
5720
5721
    if (self::$SUPPORT['mbstring'] === true) {
5722
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5723
    }
5724
5725
    if (
5726
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5727
        &&
5728 1
        self::$SUPPORT['intl'] === true
5729
        &&
5730 1
        Bootup::is_php('5.4') === true
5731
    ) {
5732
      return \grapheme_strrpos($haystack, $needle, $offset);
5733
    }
5734
5735
    // fallback via vanilla php
5736
5737
    if ($offset > 0) {
5738
      $haystack = self::substr($haystack, $offset);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::substr($haystack, $offset) on line 5738 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5739
    } elseif ($offset < 0) {
5740
      $haystack = self::substr($haystack, 0, $offset);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::substr($haystack, 0, $offset) on line 5740 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5741
      $offset = 0;
5742
    }
5743
5744 8
    $pos = strrpos($haystack, $needle);
5745
    if ($pos === false) {
5746 8
      return false;
5747 2
    }
5748
5749
    return $offset + self::strlen(substr($haystack, 0, $pos));
5750 7
  }
5751 7
5752 7
  /**
5753
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5754 7
   * mask.
5755 1
   *
5756 1
   * @param string $str    <p>The input string.</p>
5757 7
   * @param string $mask   <p>The mask of chars</p>
5758
   * @param int    $offset [optional]
5759
   * @param int    $length [optional]
5760 7
   *
5761
   * @return int
5762 7
   */
5763 7
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5764
  {
5765
    // init
5766
    $length = (int)$length;
5767 7
    $offset = (int)$offset;
5768
5769
    if ($offset || 2147483647 !== $length) {
5770
      $str = self::substr($str, $offset, $length);
5771 1
    }
5772 1
5773 1
    $str = (string)$str;
5774 7
    if (!isset($str[0], $mask[0])) {
5775 7
      return 0;
5776 7
    }
5777
5778 7
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5779 7
  }
5780
5781 7
  /**
5782
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5783
   *
5784
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5785
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5786
   * @param bool    $before_needle [optional] <p>
5787
   *                               If <b>TRUE</b>, strstr() returns the part of the
5788
   *                               haystack before the first occurrence of the needle (excluding the needle).
5789
   *                               </p>
5790
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5791
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5792
   *
5793
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5794
   */
5795 View Code Duplication
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5796
  {
5797
    $haystack = (string)$haystack;
5798
    $needle = (string)$needle;
5799
5800
    if (!isset($haystack[0], $needle[0])) {
5801 1
      return false;
5802
    }
5803 1
5804
    if ($cleanUtf8 === true) {
5805 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5806 1
      // if invalid characters are found in $haystack before $needle
5807
      $needle = self::clean($needle);
5808
      $haystack = self::clean($haystack);
5809 1
    }
5810
5811 1
    if ($encoding !== 'UTF-8') {
5812
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5813 1
    }
5814 1
5815 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5816 1
      self::checkForSupport();
5817
    }
5818 1
5819 1
    if (
5820 1
        $encoding !== 'UTF-8'
5821
        &&
5822 1
        self::$SUPPORT['mbstring'] === false
5823
    ) {
5824
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5825
    }
5826
5827
    if (self::$SUPPORT['mbstring'] === true) {
5828
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5829
    }
5830 1
5831
    if (
5832
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5833
        &&
5834
        self::$SUPPORT['intl'] === true
5835
        &&
5836
        Bootup::is_php('5.4') === true
5837
    ) {
5838
      return \grapheme_strstr($haystack, $needle, $before_needle);
5839
    }
5840
5841
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5842
5843
    if (!isset($match[1])) {
5844
      return false;
5845
    }
5846
5847
    if ($before_needle) {
5848
      return $match[1];
5849
    }
5850
5851
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 5808 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5852
  }
5853
5854
  /**
5855
   * Unicode transformation for case-less matching.
5856
   *
5857
   * @link http://unicode.org/reports/tr21/tr21-5.html
5858
   *
5859
   * @param string  $str       <p>The input string.</p>
5860
   * @param bool    $full      [optional] <p>
5861
   *                           <b>true</b>, replace full case folding chars (default)<br />
5862
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5863
   *                           </p>
5864
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5865
   *
5866
   * @return string
5867
   */
5868
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5869
  {
5870
    // init
5871
    $str = (string)$str;
5872
5873
    if (!isset($str[0])) {
5874
      return '';
5875
    }
5876
5877
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5878
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5879
5880
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5881
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
5882
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
5883
    }
5884
5885
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5886
5887
    if ($full) {
5888
5889
      static $FULL_CASE_FOLD = null;
5890
5891
      if ($FULL_CASE_FOLD === null) {
5892
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5893
      }
5894
5895
      /** @noinspection OffsetOperationsInspection */
5896
      $str = str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5897
    }
5898
5899
    if ($cleanUtf8 === true) {
5900
      $str = self::clean($str);
5901
    }
5902
5903
    return self::strtolower($str);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str) on line 5900 can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5904
  }
5905
5906
  /**
5907
   * Make a string lowercase.
5908
   *
5909
   * @link http://php.net/manual/en/function.mb-strtolower.php
5910
   *
5911
   * @param string  $str       <p>The string being lowercased.</p>
5912
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5913
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5914
   * @param string|null $lang  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5915
   *
5916
   * @return string str with all alphabetic characters converted to lowercase.
5917
   */
5918 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5919
  {
5920
    // init
5921
    $str = (string)$str;
5922
5923
    if (!isset($str[0])) {
5924
      return '';
5925
    }
5926
5927
    if ($cleanUtf8 === true) {
5928
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5929
      // if invalid characters are found in $haystack before $needle
5930
      $str = self::clean($str);
5931
    }
5932
5933
    if ($encoding !== 'UTF-8') {
5934
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5935
    }
5936
5937
    if ($lang !== null) {
5938
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5939
        self::checkForSupport();
5940
      }
5941
5942
      if (
5943
          self::$SUPPORT['intl'] === true
5944
          &&
5945
          Bootup::is_php('5.4') === true
5946
      ) {
5947
5948
        $langCode = $lang . '-Lower';
5949
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5950
           $langCode = 'Any-Lower';
5951
        }
5952
5953
        return transliterator_transliterate($langCode, $str);
5954
      }
5955
5956
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "strict"-parameter', E_USER_WARNING);
5957
    }
5958
5959
    return \mb_strtolower($str, $encoding);
5960
  }
5961
5962
  /**
5963
   * Generic case sensitive transformation for collation matching.
5964
   *
5965
   * @param string $str <p>The input string</p>
5966
   *
5967
   * @return string
5968
   */
5969
  private static function strtonatfold($str)
5970
  {
5971
    /** @noinspection PhpUndefinedClassInspection */
5972
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5973
  }
5974
5975
  /**
5976
   * Make a string uppercase.
5977
   *
5978
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5979
   *
5980
   * @param string  $str       <p>The string being uppercased.</p>
5981
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5982
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5983
   * @param string|null $lang  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5984
   *
5985
   * @return string str with all alphabetic characters converted to uppercase.
5986
   */
5987 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5988
  {
5989
    $str = (string)$str;
5990
5991
    if (!isset($str[0])) {
5992
      return '';
5993
    }
5994
5995
    if ($cleanUtf8 === true) {
5996
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5997
      // if invalid characters are found in $haystack before $needle
5998
      $str = self::clean($str);
5999
    }
6000
6001
    if ($encoding !== 'UTF-8') {
6002
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6003
    }
6004
6005
    if ($lang !== null) {
6006
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6007
        self::checkForSupport();
6008
      }
6009
6010
      if (
6011
          self::$SUPPORT['intl'] === true
6012
          &&
6013
          Bootup::is_php('5.4') === true
6014
      ) {
6015
6016
        $langCode = $lang . '-Upper';
6017
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6018
          $langCode = 'Any-Upper';
6019
        }
6020
6021
        return transliterator_transliterate($langCode, $str);
6022
      }
6023
6024
      trigger_error('UTF8::strtoupper() without intl + PHP >= 5.4 cannot handle the "strict"-parameter', E_USER_WARNING);
6025
    }
6026
6027
    return \mb_strtoupper($str, $encoding);
6028
  }
6029
6030
  /**
6031
   * Translate characters or replace sub-strings.
6032
   *
6033
   * @link  http://php.net/manual/en/function.strtr.php
6034
   *
6035
   * @param string          $str  <p>The string being translated.</p>
6036
   * @param string|string[] $from <p>The string replacing from.</p>
6037
   * @param string|string[] $to   <p>The string being translated to to.</p>
6038
   *
6039
   * @return string <p>
6040
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6041
   *                corresponding character in to.
6042
   *                </p>
6043
   */
6044
  public static function strtr($str, $from, $to = INF)
6045
  {
6046
    if (INF !== $to) {
6047
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6047 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6048
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6048 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6049
      $countFrom = count($from);
6050
      $countTo = count($to);
6051
6052
      if ($countFrom > $countTo) {
6053
        $from = array_slice($from, 0, $countTo);
6054
      } elseif ($countFrom < $countTo) {
6055
        $to = array_slice($to, 0, $countFrom);
6056
      }
6057 1
6058
      $from = array_combine($from, $to);
6059 1
    }
6060
6061
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 6044 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6062
  }
6063
6064
  /**
6065
   * Return the width of a string.
6066
   *
6067
   * @param string  $str       <p>The input string.</p>
6068
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6069 6
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6070
   *
6071 6
   * @return int
6072 6
   */
6073
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6074 6
  {
6075
    if ($encoding !== 'UTF-8') {
6076 6
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6077 3
    }
6078
6079
    if ($cleanUtf8 === true) {
6080
      // iconv and mbstring are not tolerant to invalid encoding
6081 6
      // further, their behaviour is inconsistent with that of PHP's substr
6082
      $str = self::clean($str);
6083 6
    }
6084 1
6085 1
    // fallback to "mb_"-function via polyfill
6086 1
    return \mb_strwidth($str, $encoding);
6087
  }
6088 6
6089
  /**
6090
   * Get part of a string.
6091
   *
6092
   * @link http://php.net/manual/en/function.mb-substr.php
6093
   *
6094
   * @param string  $str       <p>The string being checked.</p>
6095
   * @param int     $start     <p>The first position used in str.</p>
6096
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6097
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6098 6
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6099
   *
6100 6
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
6101
   */
6102 6
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6103 6
  {
6104
    // init
6105
    $str = (string)$str;
6106 5
6107 5
    if (!isset($str[0])) {
6108
      return '';
6109 5
    }
6110 1
6111 1
    if ($cleanUtf8 === true) {
6112 1
      // iconv and mbstring are not tolerant to invalid encoding
6113
      // further, their behaviour is inconsistent with that of PHP's substr
6114 5
      $str = self::clean($str);
6115
    }
6116
6117
    $str_length = 0;
6118
    if ($start || $length === null) {
6119
      $str_length = (int)self::strlen($str, $encoding);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str) on line 6114 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6120
    }
6121
6122
    if ($start && $start > $str_length) {
6123
      return false;
6124
    }
6125
6126
    if ($length === null) {
6127
      $length = $str_length;
6128
    } else {
6129
      $length = (int)$length;
6130
    }
6131
6132 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6133
        $encoding === 'UTF-8'
6134
        ||
6135
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6136
    ) {
6137
      $encoding = 'UTF-8';
6138
    } else {
6139
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6140
    }
6141
6142
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6143
      self::checkForSupport();
6144 1
    }
6145
6146 1
    if (
6147
        $encoding === 'CP850'
6148
        &&
6149
        self::$SUPPORT['mbstring_func_overload'] === false
6150
    ) {
6151
      return substr($str, $start, $length === null ? $str_length : $length);
6152
    }
6153
6154
    if (
6155
        $encoding !== 'UTF-8'
6156
        &&
6157
        self::$SUPPORT['mbstring'] === false
6158 1
    ) {
6159
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6160 1
    }
6161
6162 1
    if (self::$SUPPORT['mbstring'] === true) {
6163 1
      return \mb_substr($str, $start, $length, $encoding);
6164
    }
6165
6166 1
    if (
6167
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6168 1
        &&
6169 1
        self::$SUPPORT['intl'] === true
6170
        &&
6171
        Bootup::is_php('5.4') === true
6172 1
    ) {
6173
      return \grapheme_substr($str, $start, $length);
6174
    }
6175 1
6176 1
    if (
6177 1
        $length >= 0 // "iconv_substr()" can't handle negative length
6178 1
        &&
6179 1
        self::$SUPPORT['iconv'] === true
6180
    ) {
6181
      return \iconv_substr($str, $start, $length);
6182 1
    }
6183
6184
    // fallback via vanilla php
6185
6186
    // split to array, and remove invalid characters
6187
    $array = self::split($str);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str) on line 6114 can also be of type false; however, voku\helper\UTF8::split() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6188
6189
    // extract relevant part, and join to make sting again
6190
    return implode('', array_slice($array, $start, $length));
6191
  }
6192
6193
  /**
6194
   * Binary safe comparison of two strings from an offset, up to length characters.
6195
   *
6196
   * @param string  $main_str           <p>The main string being compared.</p>
6197
   * @param string  $str                <p>The secondary string being compared.</p>
6198
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
6199
   *                                    the end of the string.</p>
6200
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6201 10
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6202
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6203 10
   *                                    insensitive.</p>
6204 10
   *
6205
   * @return int
6206 10
   */
6207 3
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
6208
  {
6209
    $main_str = self::substr($main_str, $offset, $length);
6210 8
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6209 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6211 8
6212 8
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6209 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6210 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6209 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6210 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6213
  }
6214 8
6215
  /**
6216 8
   * Count the number of substring occurrences.
6217
   *
6218 8
   * @link  http://php.net/manual/en/function.substr-count.php
6219 1
   *
6220 1
   * @param string  $haystack  <p>The string to search in.</p>
6221 1
   * @param string  $needle    <p>The substring to search for.</p>
6222
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6223 8
   * @param int     $length    [optional] <p>
6224 8
   *                           The maximum length after the specified offset to search for the
6225
   *                           substring. It outputs a warning if the offset plus the length is
6226 8
   *                           greater than the haystack length.
6227 8
   *                           </p>
6228 8
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
6229 8
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6230 8
   *
6231
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6232 8
   */
6233 8
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6234 8
  {
6235 8
    // init
6236
    $haystack = (string)$haystack;
6237 8
    $needle = (string)$needle;
6238 6
6239 6
    if (!isset($haystack[0], $needle[0])) {
6240 6
      return false;
6241 6
    }
6242
6243 6
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6244 3
      $offset = (int)$offset;
6245 3
      $length = (int)$length;
6246
6247 6
      if (
6248 6
          $length + $offset <= 0
6249
          &&
6250 8
          Bootup::is_php('7.1') === false
6251
      ) {
6252
        return false;
6253
      }
6254
6255
      $haystack = self::substr($haystack, $offset, $length, $encoding);
6256
    }
6257
6258 1
    if ($encoding !== 'UTF-8') {
6259
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6260 1
    }
6261
6262
    if ($cleanUtf8 === true) {
6263
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6264
      // if invalid characters are found in $haystack before $needle
6265
      $needle = self::clean($needle);
6266
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 6266 can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6267
    }
6268
6269
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6270
      self::checkForSupport();
6271
    }
6272
6273
    if (
6274
        $encoding !== 'UTF-8'
6275
        &&
6276
        self::$SUPPORT['mbstring'] === false
6277
    ) {
6278
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6279
    }
6280
6281
    if (self::$SUPPORT['mbstring'] === true) {
6282
      return \mb_substr_count($haystack, $needle, $encoding);
6283
    }
6284
6285
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6286
6287
    return count($matches);
6288
  }
6289
6290
  /**
6291
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6292
   *
6293
   * @param string $haystack <p>The string to search in.</p>
6294
   * @param string $needle   <p>The substring to search for.</p>
6295
   *
6296
   * @return string <p>Return the sub-string.</p>
6297
   */
6298 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6299
  {
6300
    // init
6301
    $haystack = (string)$haystack;
6302
    $needle = (string)$needle;
6303
6304
    if (!isset($haystack[0])) {
6305
      return '';
6306
    }
6307
6308
    if (!isset($needle[0])) {
6309
      return $haystack;
6310
    }
6311
6312
    if (self::str_istarts_with($haystack, $needle) === true) {
6313
      $haystack = self::substr($haystack, self::strlen($needle));
6314
    }
6315
6316
    return $haystack;
6317
  }
6318
6319
  /**
6320
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6321
   *
6322
   * @param string $haystack <p>The string to search in.</p>
6323
   * @param string $needle   <p>The substring to search for.</p>
6324
   *
6325
   * @return string <p>Return the sub-string.</p>
6326
   */
6327 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6328
  {
6329
    // init
6330
    $haystack = (string)$haystack;
6331
    $needle = (string)$needle;
6332
6333
    if (!isset($haystack[0])) {
6334
      return '';
6335
    }
6336
6337
    if (!isset($needle[0])) {
6338
      return $haystack;
6339
    }
6340
6341
    if (self::str_iends_with($haystack, $needle) === true) {
6342
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6343
    }
6344
6345
    return $haystack;
6346
  }
6347
6348
  /**
6349
   * Removes an prefix ($needle) from start of the string ($haystack).
6350
   *
6351
   * @param string $haystack <p>The string to search in.</p>
6352
   * @param string $needle   <p>The substring to search for.</p>
6353
   *
6354
   * @return string <p>Return the sub-string.</p>
6355
   */
6356 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6357
  {
6358
    // init
6359
    $haystack = (string)$haystack;
6360
    $needle = (string)$needle;
6361
6362
    if (!isset($haystack[0])) {
6363
      return '';
6364
    }
6365
6366
    if (!isset($needle[0])) {
6367
      return $haystack;
6368
    }
6369
6370
    if (self::str_starts_with($haystack, $needle) === true) {
6371
      $haystack = self::substr($haystack, self::strlen($needle));
6372
    }
6373
6374
    return $haystack;
6375
  }
6376
6377
  /**
6378
   * Replace text within a portion of a string.
6379
   *
6380
   * source: https://gist.github.com/stemar/8287074
6381
   *
6382
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6383
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6384
   * @param int|int[]       $start            <p>
6385
   *                                          If start is positive, the replacing will begin at the start'th offset
6386
   *                                          into string.
6387
   *                                          <br /><br />
6388
   *                                          If start is negative, the replacing will begin at the start'th character
6389
   *                                          from the end of string.
6390
   *                                          </p>
6391
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6392
   *                                          portion of string which is to be replaced. If it is negative, it
6393
   *                                          represents the number of characters from the end of string at which to
6394
   *                                          stop replacing. If it is not given, then it will default to strlen(
6395
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6396
   *                                          length is zero then this function will have the effect of inserting
6397
   *                                          replacement into string at the given start offset.</p>
6398
   *
6399
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6400
   */
6401
  public static function substr_replace($str, $replacement, $start, $length = null)
6402
  {
6403
    if (is_array($str) === true) {
6404
      $num = count($str);
6405
6406
      // $replacement
6407
      if (is_array($replacement) === true) {
6408
        $replacement = array_slice($replacement, 0, $num);
6409
      } else {
6410
        $replacement = array_pad(array($replacement), $num, $replacement);
6411
      }
6412
6413
      // $start
6414 View Code Duplication
      if (is_array($start) === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6415
        $start = array_slice($start, 0, $num);
6416
        foreach ($start as &$valueTmp) {
6417
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6418
        }
6419
        unset($valueTmp);
6420
      } else {
6421
        $start = array_pad(array($start), $num, $start);
6422
      }
6423
6424
      // $length
6425
      if (!isset($length)) {
6426
        $length = array_fill(0, $num, 0);
6427 View Code Duplication
      } elseif (is_array($length) === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6428
        $length = array_slice($length, 0, $num);
6429
        foreach ($length as &$valueTmpV2) {
6430
          if (isset($valueTmpV2)) {
6431
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6432
          } else {
6433
            $valueTmpV2 = 0;
6434
          }
6435
        }
6436
        unset($valueTmpV2);
6437
      } else {
6438
        $length = array_pad(array($length), $num, $length);
6439
      }
6440
6441
      // Recursive call
6442
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $start, $length);
6443
6444
    }
6445
6446
    if (is_array($replacement) === true) {
6447
      if (count($replacement) > 0) {
6448
        $replacement = $replacement[0];
6449
      } else {
6450
        $replacement = '';
6451
      }
6452
    }
6453
6454
    // init
6455
    $str = (string)$str;
6456
    $replacement = (string)$replacement;
6457
6458
    if (!isset($str[0])) {
6459
      return $replacement;
6460
    }
6461
6462
    preg_match_all('/./us', $str, $smatches);
6463
    preg_match_all('/./us', $replacement, $rmatches);
6464
6465
    if ($length === null) {
6466
      $length = (int)self::strlen($str);
6467
    }
6468
6469
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6470
6471
    return implode('', $smatches[0]);
6472
  }
6473
6474
  /**
6475
   * Removes an suffix ($needle) from end of the string ($haystack).
6476
   *
6477
   * @param string $haystack <p>The string to search in.</p>
6478
   * @param string $needle   <p>The substring to search for.</p>
6479
   *
6480
   * @return string <p>Return the sub-string.</p>
6481
   */
6482 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6483
  {
6484
    $haystack = (string)$haystack;
6485
    $needle = (string)$needle;
6486
6487
    if (!isset($haystack[0])) {
6488
      return '';
6489
    }
6490
6491
    if (!isset($needle[0])) {
6492
      return $haystack;
6493
    }
6494
6495
    if (self::str_ends_with($haystack, $needle) === true) {
6496
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6497
    }
6498
6499
    return $haystack;
6500
  }
6501
6502
  /**
6503
   * Returns a case swapped version of the string.
6504
   *
6505
   * @param string  $str       <p>The input string.</p>
6506
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6507
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6508
   *
6509
   * @return string <p>Each character's case swapped.</p>
6510
   */
6511
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6512
  {
6513
    $str = (string)$str;
6514
6515
    if (!isset($str[0])) {
6516
      return '';
6517
    }
6518
6519
    if ($encoding !== 'UTF-8') {
6520
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6521
    }
6522
6523
    if ($cleanUtf8 === true) {
6524
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6525
      // if invalid characters are found in $haystack before $needle
6526
      $str = self::clean($str);
6527
    }
6528
6529
    $strSwappedCase = preg_replace_callback(
6530
        '/[\S]/u',
6531
        function ($match) use ($encoding) {
6532
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6533
6534
          if ($match[0] === $marchToUpper) {
6535
            return UTF8::strtolower($match[0], $encoding);
6536
          }
6537
6538
          return $marchToUpper;
6539
        },
6540
        $str
6541
    );
6542
6543
    return $strSwappedCase;
6544
  }
6545
6546
  /**
6547
   * alias for "UTF8::to_ascii()"
6548
   *
6549
   * @see UTF8::to_ascii()
6550
   *
6551
   * @param string $s
6552
   * @param string $subst_chr
6553
   * @param bool   $strict
6554
   *
6555
   * @return string
6556
   *
6557
   * @deprecated
6558
   */
6559
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6560
  {
6561
    return self::to_ascii($s, $subst_chr, $strict);
6562
  }
6563
6564
  /**
6565
   * alias for "UTF8::to_iso8859()"
6566
   *
6567
   * @see UTF8::to_iso8859()
6568
   *
6569
   * @param string $str
6570
   *
6571
   * @return string|string[]
6572
   *
6573
   * @deprecated
6574
   */
6575
  public static function toIso8859($str)
6576
  {
6577
    return self::to_iso8859($str);
6578
  }
6579
6580
  /**
6581
   * alias for "UTF8::to_latin1()"
6582
   *
6583
   * @see UTF8::to_latin1()
6584
   *
6585
   * @param $str
6586
   *
6587
   * @return string
6588
   *
6589
   * @deprecated
6590
   */
6591
  public static function toLatin1($str)
6592
  {
6593
    return self::to_latin1($str);
6594
  }
6595
6596
  /**
6597
   * alias for "UTF8::to_utf8()"
6598
   *
6599
   * @see UTF8::to_utf8()
6600
   *
6601
   * @param string $str
6602
   *
6603
   * @return string
6604
   *
6605
   * @deprecated
6606
   */
6607
  public static function toUTF8($str)
6608
  {
6609
    return self::to_utf8($str);
6610
  }
6611
6612
  /**
6613
   * Convert a string into ASCII.
6614
   *
6615
   * @param string $str     <p>The input string.</p>
6616
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6617
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6618
   *                        performance</p>
6619
   *
6620
   * @return string
6621
   */
6622
  public static function to_ascii($str, $unknown = '?', $strict = false)
6623
  {
6624
    static $UTF8_TO_ASCII;
6625
6626
    // init
6627
    $str = (string)$str;
6628
6629
    if (!isset($str[0])) {
6630
      return '';
6631
    }
6632
6633
    $str = self::clean($str, true, true, true);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The expression self::clean($str, true, true, true); of type string|false adds false to the return on line 6637 which is incompatible with the return type documented by voku\helper\UTF8::to_ascii of type string. It seems like you forgot to handle an error condition.
Loading history...
6634
6635
    // check if we only have ASCII
6636
    if (self::is_ascii($str) === true) {
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str, true, true, true) on line 6633 can also be of type false; however, voku\helper\UTF8::is_ascii() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6637
      return $str;
6638
    }
6639
6640
    if ($strict === true) {
6641
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6642
        self::checkForSupport();
6643
      }
6644
6645
      if (
6646
          self::$SUPPORT['intl'] === true
6647
          &&
6648
          Bootup::is_php('5.4') === true
6649
      ) {
6650
6651
        // HACK for issue from "transliterator_transliterate()"
6652
        $str = str_replace(
6653
            'ℌ',
6654
            'H',
6655
            $str
6656
        );
6657
6658
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6659
6660
        // check again, if we only have ASCII, now ...
6661
        if (self::is_ascii($str) === true) {
6662
          return $str;
6663
        }
6664
6665
      }
6666
    }
6667
6668
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6669
    $chars = $ar[0];
6670
    foreach ($chars as &$c) {
6671
6672
      $ordC0 = ord($c[0]);
6673
6674
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6675
        continue;
6676
      }
6677
6678
      $ordC1 = ord($c[1]);
6679
6680
      // ASCII - next please
6681
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6682
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6683
      }
6684
6685
      if ($ordC0 >= 224) {
6686
        $ordC2 = ord($c[2]);
6687
6688
        if ($ordC0 <= 239) {
6689
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6690
        }
6691
6692
        if ($ordC0 >= 240) {
6693
          $ordC3 = ord($c[3]);
6694
6695
          if ($ordC0 <= 247) {
6696
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6697
          }
6698
6699
          if ($ordC0 >= 248) {
6700
            $ordC4 = ord($c[4]);
6701
6702 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6703
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6704
            }
6705
6706
            if ($ordC0 >= 252) {
6707
              $ordC5 = ord($c[5]);
6708
6709 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6710
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6711
              }
6712
            }
6713
          }
6714
        }
6715
      }
6716
6717
      if ($ordC0 == 254 || $ordC0 == 255) {
6718
        $c = $unknown;
6719
        continue;
6720
      }
6721
6722
      if (!isset($ord)) {
6723
        $c = $unknown;
6724
        continue;
6725
      }
6726
6727
      $bank = $ord >> 8;
6728
      if (!isset($UTF8_TO_ASCII[$bank])) {
6729
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6730
        if ($UTF8_TO_ASCII[$bank] === false) {
6731
          $UTF8_TO_ASCII[$bank] = array();
6732
        }
6733
      }
6734
6735
      $newchar = $ord & 255;
6736
6737
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6738
6739
        // keep for debugging
6740
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
45% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6741
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6742
        echo "char: " . $c . "\n";
6743
        echo "ord: " . $ord . "\n";
6744
        echo "newchar: " . $newchar . "\n";
6745
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6746
        echo "bank:" . $bank . "\n\n";
6747
        */
6748
6749
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6750
      } else {
6751
6752
        // keep for debugging missing chars
6753
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
41% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6754
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6755
        echo "char: " . $c . "\n";
6756
        echo "ord: " . $ord . "\n";
6757
        echo "newchar: " . $newchar . "\n";
6758
        echo "bank:" . $bank . "\n\n";
6759
        */
6760
6761
        $c = $unknown;
6762
      }
6763
    }
6764
6765
    return implode('', $chars);
6766
  }
6767
6768
  /**
6769
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6770
   *
6771
   * @param string|string[] $str
6772
   *
6773
   * @return string|string[]
6774
   */
6775
  public static function to_iso8859($str)
6776
  {
6777
    if (is_array($str) === true) {
6778
6779
      /** @noinspection ForeachSourceInspection */
6780
      foreach ($str as $k => $v) {
6781
        /** @noinspection AlterInForeachInspection */
6782
        /** @noinspection OffsetOperationsInspection */
6783
        $str[$k] = self::to_iso8859($v);
6784
      }
6785
6786
      return $str;
6787
    }
6788
6789
    $str = (string)$str;
6790
6791
    if (!isset($str[0])) {
6792
      return '';
6793
    }
6794
6795
    return self::utf8_decode($str);
6796
  }
6797
6798
  /**
6799
   * alias for "UTF8::to_iso8859()"
6800
   *
6801
   * @see UTF8::to_iso8859()
6802
   *
6803
   * @param string|string[] $str
6804
   *
6805
   * @return string|string[]
6806
   */
6807
  public static function to_latin1($str)
6808
  {
6809
    return self::to_iso8859($str);
6810
  }
6811
6812
  /**
6813
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6814
   *
6815
   * <ul>
6816
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6817
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
6818
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6819
   * case.</li>
6820
   * </ul>
6821
   *
6822
   * @param string|string[] $str                    <p>Any string or array.</p>
6823
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6824
   *
6825
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6826
   */
6827
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6828
  {
6829
    if (is_array($str) === true) {
6830
      /** @noinspection ForeachSourceInspection */
6831
      foreach ($str as $k => $v) {
6832
        /** @noinspection AlterInForeachInspection */
6833
        /** @noinspection OffsetOperationsInspection */
6834
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6835
      }
6836
6837
      return $str;
6838
    }
6839
6840
    $str = (string)$str;
6841
6842
    if (!isset($str[0])) {
6843
      return $str;
6844
    }
6845
6846
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6847
      self::checkForSupport();
6848
    }
6849
6850 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6851
      $max = \mb_strlen($str, '8BIT');
6852
    } else {
6853
      $max = strlen($str);
6854
    }
6855
6856
    $buf = '';
6857
6858
    /** @noinspection ForeachInvariantsInspection */
6859
    for ($i = 0; $i < $max; $i++) {
6860
6861
      $c1 = $str[$i];
6862
6863
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6864
6865
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6866
6867
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6868
6869
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6870
            $buf .= $c1 . $c2;
6871
            $i++;
6872 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6873
            $cc1tmp = ord($c1) / 64;
6874
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6875
            $cc2 = ($c1 & "\x3F") | "\x80";
6876
            $buf .= $cc1 . $cc2;
6877
          }
6878
6879
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6880
6881
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6882
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6883
6884
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6885
            $buf .= $c1 . $c2 . $c3;
6886
            $i += 2;
6887 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6888
            $cc1tmp = ord($c1) / 64;
6889
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6890
            $cc2 = ($c1 & "\x3F") | "\x80";
6891
            $buf .= $cc1 . $cc2;
6892
          }
6893
6894
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6895
6896
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6897
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6898
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6899
6900
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6901
            $buf .= $c1 . $c2 . $c3 . $c4;
6902
            $i += 3;
6903 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6904
            $cc1tmp = ord($c1) / 64;
6905
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6906
            $cc2 = ($c1 & "\x3F") | "\x80";
6907
            $buf .= $cc1 . $cc2;
6908
          }
6909
6910 View Code Duplication
        } else { // doesn't look like UTF8, but should be converted
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6911
          $cc1tmp = ord($c1) / 64;
6912
          $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6913
          $cc2 = ($c1 & "\x3F") | "\x80";
6914
          $buf .= $cc1 . $cc2;
6915
        }
6916
6917
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6918
6919
        $ordC1 = ord($c1);
6920
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6921
          $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6922 View Code Duplication
        } else {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6923
          $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
6924
          $cc2 = ($c1 & "\x3F") | "\x80";
6925
          $buf .= $cc1 . $cc2;
6926
        }
6927
6928
      } else { // it doesn't need conversion
6929
        $buf .= $c1;
6930
      }
6931
    }
6932
6933
    // decode unicode escape sequences
6934
    $buf = preg_replace_callback(
6935
        '/\\\\u([0-9a-f]{4})/i',
6936
        function ($match) {
6937
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6938
        },
6939
        $buf
6940
    );
6941
6942
    // decode UTF-8 codepoints
6943
    if ($decodeHtmlEntityToUtf8 === true) {
6944
      $buf = self::html_entity_decode($buf);
6945
    }
6946
6947
    return $buf;
6948
  }
6949
6950
  /**
6951
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6952
   *
6953
   * INFO: This is slower then "trim()"
6954
   *
6955
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6956
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6957
   *
6958
   * @param string $str   <p>The string to be trimmed</p>
6959
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6960
   *
6961
   * @return string <p>The trimmed string.</p>
6962
   */
6963
  public static function trim($str = '', $chars = INF)
6964
  {
6965
    $str = (string)$str;
6966
6967
    if (!isset($str[0])) {
6968
      return '';
6969
    }
6970
6971
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6972
    if ($chars === INF || !$chars) {
6973
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6974
    }
6975
6976
    return self::rtrim(self::ltrim($str, $chars), $chars);
6977
  }
6978
6979
  /**
6980
   * Makes string's first char uppercase.
6981
   *
6982
   * @param string  $str       <p>The input string.</p>
6983
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6984
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6985
   *
6986
   * @return string <p>The resulting string</p>
6987
   */
6988
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6989
  {
6990
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6991
  }
6992
6993
  /**
6994
   * alias for "UTF8::ucfirst()"
6995
   *
6996
   * @see UTF8::ucfirst()
6997
   *
6998
   * @param string  $word
6999
   * @param string  $encoding
7000
   * @param boolean $cleanUtf8
7001
   *
7002
   * @return string
7003
   */
7004
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7005
  {
7006
    return self::ucfirst($word, $encoding, $cleanUtf8);
7007
  }
7008
7009
  /**
7010
   * Uppercase for all words in the string.
7011
   *
7012
   * @param string   $str        <p>The input string.</p>
7013
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7014
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7015
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
7016
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
7017
   *
7018
   * @return string
7019
   */
7020
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7021
  {
7022
    if (!$str) {
7023
      return '';
7024
    }
7025
7026
    $words = self::str_to_words($str, $charlist);
7027
    $newWords = array();
7028
7029
    if (count($exceptions) > 0) {
7030
      $useExceptions = true;
7031
    } else {
7032
      $useExceptions = false;
7033
    }
7034
7035
    foreach ($words as $word) {
7036
7037
      if (!$word) {
7038
        continue;
7039
      }
7040
7041
      if (
7042
          ($useExceptions === false)
7043
          ||
7044
          (
7045
              $useExceptions === true
7046
              &&
7047
              !in_array($word, $exceptions, true)
7048
          )
7049
      ) {
7050
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
7051
      }
7052
7053
      $newWords[] = $word;
7054
    }
7055
7056
    return implode('', $newWords);
7057
  }
7058
7059
  /**
7060
   * Multi decode html entity & fix urlencoded-win1252-chars.
7061
   *
7062
   * e.g:
7063
   * 'test+test'                     => 'test test'
7064
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7065
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7066
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7067
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7068
   * 'Düsseldorf'                   => 'Düsseldorf'
7069
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7070
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7071
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7072
   *
7073
   * @param string $str          <p>The input string.</p>
7074
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7075
   *
7076
   * @return string
7077
   */
7078 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7079
  {
7080
    $str = (string)$str;
7081
7082
    if (!isset($str[0])) {
7083
      return '';
7084
    }
7085
7086
    $pattern = '/%u([0-9a-f]{3,4})/i';
7087
    if (preg_match($pattern, $str)) {
7088
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7089
    }
7090
7091
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7092
7093
    do {
7094
      $str_compare = $str;
7095
7096
      $str = self::fix_simple_utf8(
7097
          urldecode(
7098
              self::html_entity_decode(
7099
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7100
                  $flags
7101
              )
7102
          )
7103
      );
7104
7105
    } while ($multi_decode === true && $str_compare !== $str);
7106
7107
    return (string)$str;
7108
  }
7109
7110
  /**
7111
   * Return a array with "urlencoded"-win1252 -> UTF-8
7112
   *
7113
   * @deprecated use the "UTF8::urldecode()" function to decode a string
7114
   *
7115
   * @return array
7116
   */
7117
  public static function urldecode_fix_win1252_chars()
7118
  {
7119
    return array(
7120
        '%20' => ' ',
7121
        '%21' => '!',
7122
        '%22' => '"',
7123
        '%23' => '#',
7124
        '%24' => '$',
7125
        '%25' => '%',
7126
        '%26' => '&',
7127
        '%27' => "'",
7128
        '%28' => '(',
7129
        '%29' => ')',
7130
        '%2A' => '*',
7131
        '%2B' => '+',
7132
        '%2C' => ',',
7133
        '%2D' => '-',
7134
        '%2E' => '.',
7135
        '%2F' => '/',
7136
        '%30' => '0',
7137
        '%31' => '1',
7138
        '%32' => '2',
7139
        '%33' => '3',
7140
        '%34' => '4',
7141
        '%35' => '5',
7142
        '%36' => '6',
7143
        '%37' => '7',
7144
        '%38' => '8',
7145
        '%39' => '9',
7146
        '%3A' => ':',
7147
        '%3B' => ';',
7148
        '%3C' => '<',
7149
        '%3D' => '=',
7150
        '%3E' => '>',
7151
        '%3F' => '?',
7152
        '%40' => '@',
7153
        '%41' => 'A',
7154
        '%42' => 'B',
7155
        '%43' => 'C',
7156
        '%44' => 'D',
7157
        '%45' => 'E',
7158
        '%46' => 'F',
7159
        '%47' => 'G',
7160
        '%48' => 'H',
7161
        '%49' => 'I',
7162
        '%4A' => 'J',
7163
        '%4B' => 'K',
7164
        '%4C' => 'L',
7165
        '%4D' => 'M',
7166
        '%4E' => 'N',
7167
        '%4F' => 'O',
7168
        '%50' => 'P',
7169
        '%51' => 'Q',
7170
        '%52' => 'R',
7171
        '%53' => 'S',
7172
        '%54' => 'T',
7173
        '%55' => 'U',
7174
        '%56' => 'V',
7175
        '%57' => 'W',
7176
        '%58' => 'X',
7177
        '%59' => 'Y',
7178
        '%5A' => 'Z',
7179
        '%5B' => '[',
7180
        '%5C' => '\\',
7181
        '%5D' => ']',
7182
        '%5E' => '^',
7183
        '%5F' => '_',
7184
        '%60' => '`',
7185
        '%61' => 'a',
7186
        '%62' => 'b',
7187
        '%63' => 'c',
7188
        '%64' => 'd',
7189
        '%65' => 'e',
7190
        '%66' => 'f',
7191
        '%67' => 'g',
7192
        '%68' => 'h',
7193
        '%69' => 'i',
7194
        '%6A' => 'j',
7195
        '%6B' => 'k',
7196
        '%6C' => 'l',
7197
        '%6D' => 'm',
7198
        '%6E' => 'n',
7199
        '%6F' => 'o',
7200
        '%70' => 'p',
7201
        '%71' => 'q',
7202
        '%72' => 'r',
7203
        '%73' => 's',
7204
        '%74' => 't',
7205
        '%75' => 'u',
7206
        '%76' => 'v',
7207
        '%77' => 'w',
7208
        '%78' => 'x',
7209
        '%79' => 'y',
7210
        '%7A' => 'z',
7211
        '%7B' => '{',
7212
        '%7C' => '|',
7213
        '%7D' => '}',
7214
        '%7E' => '~',
7215
        '%7F' => '',
7216
        '%80' => '`',
7217
        '%81' => '',
7218
        '%82' => '‚',
7219
        '%83' => 'ƒ',
7220
        '%84' => '„',
7221
        '%85' => '…',
7222
        '%86' => '†',
7223
        '%87' => '‡',
7224
        '%88' => 'ˆ',
7225
        '%89' => '‰',
7226
        '%8A' => 'Š',
7227
        '%8B' => '‹',
7228
        '%8C' => 'Œ',
7229
        '%8D' => '',
7230
        '%8E' => 'Ž',
7231
        '%8F' => '',
7232
        '%90' => '',
7233
        '%91' => '‘',
7234
        '%92' => '’',
7235
        '%93' => '“',
7236
        '%94' => '”',
7237
        '%95' => '•',
7238
        '%96' => '–',
7239
        '%97' => '—',
7240
        '%98' => '˜',
7241
        '%99' => '™',
7242
        '%9A' => 'š',
7243
        '%9B' => '›',
7244
        '%9C' => 'œ',
7245
        '%9D' => '',
7246
        '%9E' => 'ž',
7247
        '%9F' => 'Ÿ',
7248
        '%A0' => '',
7249
        '%A1' => '¡',
7250
        '%A2' => '¢',
7251
        '%A3' => '£',
7252
        '%A4' => '¤',
7253
        '%A5' => '¥',
7254
        '%A6' => '¦',
7255
        '%A7' => '§',
7256
        '%A8' => '¨',
7257
        '%A9' => '©',
7258
        '%AA' => 'ª',
7259
        '%AB' => '«',
7260
        '%AC' => '¬',
7261
        '%AD' => '',
7262
        '%AE' => '®',
7263
        '%AF' => '¯',
7264
        '%B0' => '°',
7265
        '%B1' => '±',
7266
        '%B2' => '²',
7267
        '%B3' => '³',
7268
        '%B4' => '´',
7269
        '%B5' => 'µ',
7270
        '%B6' => '¶',
7271
        '%B7' => '·',
7272
        '%B8' => '¸',
7273
        '%B9' => '¹',
7274
        '%BA' => 'º',
7275
        '%BB' => '»',
7276
        '%BC' => '¼',
7277
        '%BD' => '½',
7278
        '%BE' => '¾',
7279
        '%BF' => '¿',
7280
        '%C0' => 'À',
7281
        '%C1' => 'Á',
7282
        '%C2' => 'Â',
7283
        '%C3' => 'Ã',
7284
        '%C4' => 'Ä',
7285
        '%C5' => 'Å',
7286
        '%C6' => 'Æ',
7287
        '%C7' => 'Ç',
7288
        '%C8' => 'È',
7289
        '%C9' => 'É',
7290
        '%CA' => 'Ê',
7291
        '%CB' => 'Ë',
7292
        '%CC' => 'Ì',
7293
        '%CD' => 'Í',
7294
        '%CE' => 'Î',
7295
        '%CF' => 'Ï',
7296
        '%D0' => 'Ð',
7297
        '%D1' => 'Ñ',
7298
        '%D2' => 'Ò',
7299
        '%D3' => 'Ó',
7300
        '%D4' => 'Ô',
7301
        '%D5' => 'Õ',
7302
        '%D6' => 'Ö',
7303
        '%D7' => '×',
7304
        '%D8' => 'Ø',
7305
        '%D9' => 'Ù',
7306
        '%DA' => 'Ú',
7307
        '%DB' => 'Û',
7308
        '%DC' => 'Ü',
7309
        '%DD' => 'Ý',
7310
        '%DE' => 'Þ',
7311
        '%DF' => 'ß',
7312
        '%E0' => 'à',
7313
        '%E1' => 'á',
7314
        '%E2' => 'â',
7315
        '%E3' => 'ã',
7316
        '%E4' => 'ä',
7317
        '%E5' => 'å',
7318
        '%E6' => 'æ',
7319
        '%E7' => 'ç',
7320
        '%E8' => 'è',
7321
        '%E9' => 'é',
7322
        '%EA' => 'ê',
7323
        '%EB' => 'ë',
7324
        '%EC' => 'ì',
7325
        '%ED' => 'í',
7326
        '%EE' => 'î',
7327
        '%EF' => 'ï',
7328
        '%F0' => 'ð',
7329
        '%F1' => 'ñ',
7330
        '%F2' => 'ò',
7331
        '%F3' => 'ó',
7332
        '%F4' => 'ô',
7333
        '%F5' => 'õ',
7334
        '%F6' => 'ö',
7335
        '%F7' => '÷',
7336
        '%F8' => 'ø',
7337
        '%F9' => 'ù',
7338
        '%FA' => 'ú',
7339
        '%FB' => 'û',
7340
        '%FC' => 'ü',
7341
        '%FD' => 'ý',
7342
        '%FE' => 'þ',
7343
        '%FF' => 'ÿ',
7344
    );
7345
  }
7346
7347
  /**
7348
   * Decodes an UTF-8 string to ISO-8859-1.
7349
   *
7350
   * @param string $str <p>The input string.</p>
7351
   *
7352
   * @return string
7353
   */
7354
  public static function utf8_decode($str)
7355
  {
7356
    // init
7357
    $str = (string)$str;
7358
7359
    if (!isset($str[0])) {
7360
      return '';
7361
    }
7362
7363
    $str = (string)self::to_utf8($str);
7364
7365
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7366
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7367
7368
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7369
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7370
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7371
    }
7372
7373
    /** @noinspection PhpInternalEntityUsedInspection */
7374
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7375
7376
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7377
      self::checkForSupport();
7378
    }
7379
7380 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7381
      $len = \mb_strlen($str, '8BIT');
7382
    } else {
7383
      $len = strlen($str);
7384
    }
7385
7386
    /** @noinspection ForeachInvariantsInspection */
7387
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7388
      switch ($str[$i] & "\xF0") {
7389
        case "\xC0":
7390
        case "\xD0":
7391
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7392
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7393
          break;
7394
7395
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7396
          ++$i;
7397
        case "\xE0":
7398
          $str[$j] = '?';
7399
          $i += 2;
7400
          break;
7401
7402
        default:
7403
          $str[$j] = $str[$i];
7404
      }
7405
    }
7406
7407
    return self::substr($str, 0, $j, '8BIT');
7408
  }
7409
7410
  /**
7411
   * Encodes an ISO-8859-1 string to UTF-8.
7412
   *
7413
   * @param string $str <p>The input string.</p>
7414
   *
7415
   * @return string
7416
   */
7417
  public static function utf8_encode($str)
7418
  {
7419
    // init
7420
    $str = (string)$str;
7421
7422
    if (!isset($str[0])) {
7423
      return '';
7424
    }
7425
7426
    $str = \utf8_encode($str);
7427
7428
    if (false === strpos($str, "\xC2")) {
7429
      return $str;
7430
    }
7431
7432
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7433
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7434
7435
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7436
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7437
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7438
    }
7439
7440
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7441
  }
7442
7443
  /**
7444
   * fix -> utf8-win1252 chars
7445
   *
7446
   * @param string $str <p>The input string.</p>
7447
   *
7448
   * @return string
7449
   *
7450
   * @deprecated use "UTF8::fix_simple_utf8()"
7451
   */
7452
  public static function utf8_fix_win1252_chars($str)
7453
  {
7454
    return self::fix_simple_utf8($str);
7455
  }
7456
7457
  /**
7458
   * Returns an array with all utf8 whitespace characters.
7459
   *
7460
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7461
   *
7462
   * @author: Derek E. [email protected]
7463
   *
7464
   * @return array <p>
7465
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7466
   *               as defined in above URL.
7467
   *               </p>
7468
   */
7469
  public static function whitespace_table()
7470
  {
7471
    return self::$WHITESPACE_TABLE;
7472
  }
7473
7474
  /**
7475
   * Limit the number of words in a string.
7476
   *
7477
   * @param string $str      <p>The input string.</p>
7478
   * @param int    $words    <p>The limit of words as integer.</p>
7479
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7480
   *
7481
   * @return string
7482
   */
7483
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7484
  {
7485
    $str = (string)$str;
7486
7487
    if (!isset($str[0])) {
7488
      return '';
7489
    }
7490
7491
    $words = (int)$words;
7492
7493
    if ($words < 1) {
7494
      return '';
7495
    }
7496
7497
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7498
7499
    if (
7500
        !isset($matches[0])
7501
        ||
7502
        self::strlen($str) === self::strlen($matches[0])
7503
    ) {
7504
      return $str;
7505
    }
7506
7507
    return self::rtrim($matches[0]) . $strAddOn;
7508
  }
7509
7510
  /**
7511
   * Wraps a string to a given number of characters
7512
   *
7513
   * @link  http://php.net/manual/en/function.wordwrap.php
7514
   *
7515
   * @param string $str   <p>The input string.</p>
7516
   * @param int    $width [optional] <p>The column width.</p>
7517
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7518
   * @param bool   $cut   [optional] <p>
7519
   *                      If the cut is set to true, the string is
7520
   *                      always wrapped at or before the specified width. So if you have
7521
   *                      a word that is larger than the given width, it is broken apart.
7522
   *                      </p>
7523
   *
7524
   * @return string <p>The given string wrapped at the specified column.</p>
7525
   */
7526
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7527
  {
7528
    $str = (string)$str;
7529
    $break = (string)$break;
7530
7531
    if (!isset($str[0], $break[0])) {
7532
      return '';
7533
    }
7534
7535
    $w = '';
7536
    $strSplit = explode($break, $str);
7537
    $count = count($strSplit);
7538
7539
    $chars = array();
7540
    /** @noinspection ForeachInvariantsInspection */
7541
    for ($i = 0; $i < $count; ++$i) {
7542
7543
      if ($i) {
7544
        $chars[] = $break;
7545
        $w .= '#';
7546
      }
7547
7548
      $c = $strSplit[$i];
7549
      unset($strSplit[$i]);
7550
7551
      foreach (self::split($c) as $c) {
7552
        $chars[] = $c;
7553
        $w .= ' ' === $c ? ' ' : '?';
7554
      }
7555
    }
7556
7557
    $strReturn = '';
7558
    $j = 0;
7559
    $b = $i = -1;
7560
    $w = wordwrap($w, $width, '#', $cut);
7561
7562
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7563
      for (++$i; $i < $b; ++$i) {
7564
        $strReturn .= $chars[$j];
7565
        unset($chars[$j++]);
7566
      }
7567
7568
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7569
        unset($chars[$j++]);
7570
      }
7571
7572
      $strReturn .= $break;
7573
    }
7574
7575
    return $strReturn . implode('', $chars);
7576
  }
7577
7578
  /**
7579
   * Returns an array of Unicode White Space characters.
7580
   *
7581
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7582
   */
7583
  public static function ws()
7584
  {
7585
    return self::$WHITESPACE;
7586
  }
7587
7588
}
7589