Completed
Push — master ( bec26a...7fb737 )
by Lars
03:55
created

UTF8::range()   D

Complexity

Conditions 9
Paths 22

Size

Total Lines 38
Code Lines 24

Duplication

Lines 14
Ratio 36.84 %

Code Coverage

Tests 0
CRAP Score 90

Importance

Changes 0
Metric Value
dl 14
loc 38
ccs 0
cts 6
cp 0
rs 4.909
c 0
b 0
f 0
cc 9
eloc 24
nc 22
nop 2
crap 90
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  /**
15
   * @var array
16
   */
17
  private static $WIN1252_TO_UTF8 = array(
18
      128 => "\xe2\x82\xac", // EURO SIGN
19
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
20
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
21
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
22
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
23
      134 => "\xe2\x80\xa0", // DAGGER
24
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
25
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
26
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
27
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
28
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
29
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
30
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
31
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
32
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
33
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
34
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
35
      149 => "\xe2\x80\xa2", // BULLET
36
      150 => "\xe2\x80\x93", // EN DASH
37
      151 => "\xe2\x80\x94", // EM DASH
38
      152 => "\xcb\x9c", // SMALL TILDE
39
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
40
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
41
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
42
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
43
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
44
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
45
  );
46
47
  /**
48
   * @var array
49
   */
50
  private static $CP1252_TO_UTF8 = array(
51
      '€' => '€',
52
      '‚' => '‚',
53
      'ƒ' => 'ƒ',
54
      '„' => '„',
55
      '…' => '…',
56
      '†' => '†',
57
      '‡' => '‡',
58
      'ˆ' => 'ˆ',
59
      '‰' => '‰',
60
      'Š' => 'Š',
61
      '‹' => '‹',
62
      'Œ' => 'Œ',
63
      'Ž' => 'Ž',
64
      '‘' => '‘',
65
      '’' => '’',
66
      '“' => '“',
67
      '”' => '”',
68
      '•' => '•',
69
      '–' => '–',
70
      '—' => '—',
71
      '˜' => '˜',
72
      '™' => '™',
73
      'š' => 'š',
74
      '›' => '›',
75
      'œ' => 'œ',
76
      'ž' => 'ž',
77
      'Ÿ' => 'Ÿ',
78
  );
79
80
  /**
81
   * Bom => Byte-Length
82
   *
83
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
84
   *
85
   * @var array
86
   */
87
  private static $BOM = array(
88
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
89
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
90
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
91
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
94
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
95
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
96
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
97
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
98
  );
99
100
  /**
101
   * Numeric code point => UTF-8 Character
102
   *
103
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
104
   *
105
   * @var array
106
   */
107
  private static $WHITESPACE = array(
108
    // NUL Byte
109
    0     => "\x0",
110
    // Tab
111
    9     => "\x9",
112
    // New Line
113
    10    => "\xa",
114
    // Vertical Tab
115
    11    => "\xb",
116
    // Carriage Return
117
    13    => "\xd",
118
    // Ordinary Space
119
    32    => "\x20",
120
    // NO-BREAK SPACE
121
    160   => "\xc2\xa0",
122
    // OGHAM SPACE MARK
123
    5760  => "\xe1\x9a\x80",
124
    // MONGOLIAN VOWEL SEPARATOR
125
    6158  => "\xe1\xa0\x8e",
126
    // EN QUAD
127
    8192  => "\xe2\x80\x80",
128
    // EM QUAD
129
    8193  => "\xe2\x80\x81",
130
    // EN SPACE
131
    8194  => "\xe2\x80\x82",
132
    // EM SPACE
133
    8195  => "\xe2\x80\x83",
134
    // THREE-PER-EM SPACE
135
    8196  => "\xe2\x80\x84",
136
    // FOUR-PER-EM SPACE
137
    8197  => "\xe2\x80\x85",
138
    // SIX-PER-EM SPACE
139
    8198  => "\xe2\x80\x86",
140
    // FIGURE SPACE
141
    8199  => "\xe2\x80\x87",
142
    // PUNCTUATION SPACE
143
    8200  => "\xe2\x80\x88",
144
    // THIN SPACE
145
    8201  => "\xe2\x80\x89",
146
    //HAIR SPACE
147
    8202  => "\xe2\x80\x8a",
148
    // LINE SEPARATOR
149
    8232  => "\xe2\x80\xa8",
150
    // PARAGRAPH SEPARATOR
151
    8233  => "\xe2\x80\xa9",
152
    // NARROW NO-BREAK SPACE
153
    8239  => "\xe2\x80\xaf",
154
    // MEDIUM MATHEMATICAL SPACE
155
    8287  => "\xe2\x81\x9f",
156
    // IDEOGRAPHIC SPACE
157
    12288 => "\xe3\x80\x80",
158
  );
159
160
  /**
161
   * @var array
162
   */
163
  private static $WHITESPACE_TABLE = array(
164
      'SPACE'                     => "\x20",
165
      'NO-BREAK SPACE'            => "\xc2\xa0",
166
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
167
      'EN QUAD'                   => "\xe2\x80\x80",
168
      'EM QUAD'                   => "\xe2\x80\x81",
169
      'EN SPACE'                  => "\xe2\x80\x82",
170
      'EM SPACE'                  => "\xe2\x80\x83",
171
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
172
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
173
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
174
      'FIGURE SPACE'              => "\xe2\x80\x87",
175
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
176
      'THIN SPACE'                => "\xe2\x80\x89",
177
      'HAIR SPACE'                => "\xe2\x80\x8a",
178
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
179
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
180
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
181
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
182
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
183
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
184
  );
185
186
  /**
187
   * bidirectional text chars
188
   *
189
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
190
   *
191
   * @var array
192
   */
193
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
194
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
195
    8234 => "\xE2\x80\xAA",
196
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
197
    8235 => "\xE2\x80\xAB",
198
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
199
    8236 => "\xE2\x80\xAC",
200
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
201
    8237 => "\xE2\x80\xAD",
202
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
203
    8238 => "\xE2\x80\xAE",
204
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
205
    8294 => "\xE2\x81\xA6",
206
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
207
    8295 => "\xE2\x81\xA7",
208
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
209
    8296 => "\xE2\x81\xA8",
210
    // POP DIRECTIONAL ISOLATE
211
    8297 => "\xE2\x81\xA9",
212
  );
213
214
  /**
215
   * @var array
216
   */
217
  private static $COMMON_CASE_FOLD = array(
218
      'ſ'            => 's',
219
      "\xCD\x85"     => 'ι',
220
      'ς'            => 'σ',
221
      "\xCF\x90"     => 'β',
222
      "\xCF\x91"     => 'θ',
223
      "\xCF\x95"     => 'φ',
224
      "\xCF\x96"     => 'π',
225
      "\xCF\xB0"     => 'κ',
226
      "\xCF\xB1"     => 'ρ',
227
      "\xCF\xB5"     => 'ε',
228
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
229
      "\xE1\xBE\xBE" => 'ι',
230
  );
231
232
  /**
233
   * @var array
234
   */
235
  private static $BROKEN_UTF8_FIX = array(
236
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
237
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
238
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
239
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
240
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
241
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
242
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
243
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
244
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
245
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
246
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
247
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
248
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
249
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
250
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
251
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
252
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
253
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
254
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
255
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
256
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
257
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
258
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
259
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
260
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
261
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
262
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
263
      'ü'       => 'ü',
264
      'ä'       => 'ä',
265
      'ö'       => 'ö',
266
      'Ö'       => 'Ö',
267
      'ß'       => 'ß',
268
      'Ã '       => 'à',
269
      'á'       => 'á',
270
      'â'       => 'â',
271
      'ã'       => 'ã',
272
      'ù'       => 'ù',
273
      'ú'       => 'ú',
274
      'û'       => 'û',
275
      'Ù'       => 'Ù',
276
      'Ú'       => 'Ú',
277
      'Û'       => 'Û',
278
      'Ü'       => 'Ü',
279
      'ò'       => 'ò',
280
      'ó'       => 'ó',
281
      'ô'       => 'ô',
282
      'è'       => 'è',
283
      'é'       => 'é',
284
      'ê'       => 'ê',
285
      'ë'       => 'ë',
286
      'À'       => 'À',
287
      'Á'       => 'Á',
288
      'Â'       => 'Â',
289
      'Ã'       => 'Ã',
290
      'Ä'       => 'Ä',
291
      'Ã…'       => 'Å',
292
      'Ç'       => 'Ç',
293
      'È'       => 'È',
294
      'É'       => 'É',
295
      'Ê'       => 'Ê',
296
      'Ë'       => 'Ë',
297
      'ÃŒ'       => 'Ì',
298
      'Í'       => 'Í',
299
      'ÃŽ'       => 'Î',
300
      'Ï'       => 'Ï',
301
      'Ñ'       => 'Ñ',
302
      'Ã’'       => 'Ò',
303
      'Ó'       => 'Ó',
304
      'Ô'       => 'Ô',
305
      'Õ'       => 'Õ',
306
      'Ø'       => 'Ø',
307
      'Ã¥'       => 'å',
308
      'æ'       => 'æ',
309
      'ç'       => 'ç',
310
      'ì'       => 'ì',
311
      'í'       => 'í',
312
      'î'       => 'î',
313
      'ï'       => 'ï',
314
      'ð'       => 'ð',
315
      'ñ'       => 'ñ',
316
      'õ'       => 'õ',
317
      'ø'       => 'ø',
318
      'ý'       => 'ý',
319
      'ÿ'       => 'ÿ',
320
      '€'      => '€',
321
      '’'      => '’',
322
  );
323
324
  /**
325
   * @var array
326
   */
327
  private static $UTF8_TO_WIN1252 = array(
328
      "\xe2\x82\xac" => "\x80", // EURO SIGN
329
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
330
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
331
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
332
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
333
      "\xe2\x80\xa0" => "\x86", // DAGGER
334
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
335
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
336
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
337
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
338
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
339
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
340
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
341
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
342
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
343
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
344
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
345
      "\xe2\x80\xa2" => "\x95", // BULLET
346
      "\xe2\x80\x93" => "\x96", // EN DASH
347
      "\xe2\x80\x94" => "\x97", // EM DASH
348
      "\xcb\x9c"     => "\x98", // SMALL TILDE
349
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
350
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
351
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
352
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
353
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
354
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
355
  );
356
357
  /**
358
   * @var array
359
   */
360
  private static $UTF8_MSWORD = array(
361
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
362
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
363
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
364
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
365
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
366
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
367
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
368
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
369
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
370
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
371
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
372
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
373
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
374
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
375
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
376
  );
377
378
  /**
379
   * @var array
380
   */
381
  private static $ICONV_ENCODING = array(
382
      'ANSI_X3.4-1968',
383
      'ANSI_X3.4-1986',
384
      'ASCII',
385
      'CP367',
386
      'IBM367',
387
      'ISO-IR-6',
388
      'ISO646-US',
389
      'ISO_646.IRV:1991',
390
      'US',
391
      'US-ASCII',
392
      'CSASCII',
393
      'UTF-8',
394
      'ISO-10646-UCS-2',
395
      'UCS-2',
396
      'CSUNICODE',
397
      'UCS-2BE',
398
      'UNICODE-1-1',
399
      'UNICODEBIG',
400
      'CSUNICODE11',
401
      'UCS-2LE',
402
      'UNICODELITTLE',
403
      'ISO-10646-UCS-4',
404
      'UCS-4',
405
      'CSUCS4',
406
      'UCS-4BE',
407
      'UCS-4LE',
408
      'UTF-16',
409
      'UTF-16BE',
410
      'UTF-16LE',
411
      'UTF-32',
412
      'UTF-32BE',
413
      'UTF-32LE',
414
      'UNICODE-1-1-UTF-7',
415
      'UTF-7',
416
      'CSUNICODE11UTF7',
417
      'UCS-2-INTERNAL',
418
      'UCS-2-SWAPPED',
419
      'UCS-4-INTERNAL',
420
      'UCS-4-SWAPPED',
421
      'C99',
422
      'JAVA',
423
      'CP819',
424
      'IBM819',
425
      'ISO-8859-1',
426
      'ISO-IR-100',
427
      'ISO8859-1',
428
      'ISO_8859-1',
429
      'ISO_8859-1:1987',
430
      'L1',
431
      'LATIN1',
432
      'CSISOLATIN1',
433
      'ISO-8859-2',
434
      'ISO-IR-101',
435
      'ISO8859-2',
436
      'ISO_8859-2',
437
      'ISO_8859-2:1987',
438
      'L2',
439
      'LATIN2',
440
      'CSISOLATIN2',
441
      'ISO-8859-3',
442
      'ISO-IR-109',
443
      'ISO8859-3',
444
      'ISO_8859-3',
445
      'ISO_8859-3:1988',
446
      'L3',
447
      'LATIN3',
448
      'CSISOLATIN3',
449
      'ISO-8859-4',
450
      'ISO-IR-110',
451
      'ISO8859-4',
452
      'ISO_8859-4',
453
      'ISO_8859-4:1988',
454
      'L4',
455
      'LATIN4',
456
      'CSISOLATIN4',
457
      'CYRILLIC',
458
      'ISO-8859-5',
459
      'ISO-IR-144',
460
      'ISO8859-5',
461
      'ISO_8859-5',
462
      'ISO_8859-5:1988',
463
      'CSISOLATINCYRILLIC',
464
      'ARABIC',
465
      'ASMO-708',
466
      'ECMA-114',
467
      'ISO-8859-6',
468
      'ISO-IR-127',
469
      'ISO8859-6',
470
      'ISO_8859-6',
471
      'ISO_8859-6:1987',
472
      'CSISOLATINARABIC',
473
      'ECMA-118',
474
      'ELOT_928',
475
      'GREEK',
476
      'GREEK8',
477
      'ISO-8859-7',
478
      'ISO-IR-126',
479
      'ISO8859-7',
480
      'ISO_8859-7',
481
      'ISO_8859-7:1987',
482
      'ISO_8859-7:2003',
483
      'CSISOLATINGREEK',
484
      'HEBREW',
485
      'ISO-8859-8',
486
      'ISO-IR-138',
487
      'ISO8859-8',
488
      'ISO_8859-8',
489
      'ISO_8859-8:1988',
490
      'CSISOLATINHEBREW',
491
      'ISO-8859-9',
492
      'ISO-IR-148',
493
      'ISO8859-9',
494
      'ISO_8859-9',
495
      'ISO_8859-9:1989',
496
      'L5',
497
      'LATIN5',
498
      'CSISOLATIN5',
499
      'ISO-8859-10',
500
      'ISO-IR-157',
501
      'ISO8859-10',
502
      'ISO_8859-10',
503
      'ISO_8859-10:1992',
504
      'L6',
505
      'LATIN6',
506
      'CSISOLATIN6',
507
      'ISO-8859-11',
508
      'ISO8859-11',
509
      'ISO_8859-11',
510
      'ISO-8859-13',
511
      'ISO-IR-179',
512
      'ISO8859-13',
513
      'ISO_8859-13',
514
      'L7',
515
      'LATIN7',
516
      'ISO-8859-14',
517
      'ISO-CELTIC',
518
      'ISO-IR-199',
519
      'ISO8859-14',
520
      'ISO_8859-14',
521
      'ISO_8859-14:1998',
522
      'L8',
523
      'LATIN8',
524
      'ISO-8859-15',
525
      'ISO-IR-203',
526
      'ISO8859-15',
527
      'ISO_8859-15',
528
      'ISO_8859-15:1998',
529
      'LATIN-9',
530
      'ISO-8859-16',
531
      'ISO-IR-226',
532
      'ISO8859-16',
533
      'ISO_8859-16',
534
      'ISO_8859-16:2001',
535
      'L10',
536
      'LATIN10',
537
      'KOI8-R',
538
      'CSKOI8R',
539
      'KOI8-U',
540
      'KOI8-RU',
541
      'CP1250',
542
      'MS-EE',
543
      'WINDOWS-1250',
544
      'CP1251',
545
      'MS-CYRL',
546
      'WINDOWS-1251',
547
      'CP1252',
548
      'MS-ANSI',
549
      'WINDOWS-1252',
550
      'CP1253',
551
      'MS-GREEK',
552
      'WINDOWS-1253',
553
      'CP1254',
554
      'MS-TURK',
555
      'WINDOWS-1254',
556
      'CP1255',
557
      'MS-HEBR',
558
      'WINDOWS-1255',
559
      'CP1256',
560
      'MS-ARAB',
561
      'WINDOWS-1256',
562
      'CP1257',
563
      'WINBALTRIM',
564
      'WINDOWS-1257',
565
      'CP1258',
566
      'WINDOWS-1258',
567
      '850',
568
      'CP850',
569
      'IBM850',
570
      'CSPC850MULTILINGUAL',
571
      '862',
572
      'CP862',
573
      'IBM862',
574
      'CSPC862LATINHEBREW',
575
      '866',
576
      'CP866',
577
      'IBM866',
578
      'CSIBM866',
579
      'MAC',
580
      'MACINTOSH',
581
      'MACROMAN',
582
      'CSMACINTOSH',
583
      'MACCENTRALEUROPE',
584
      'MACICELAND',
585
      'MACCROATIAN',
586
      'MACROMANIA',
587
      'MACCYRILLIC',
588
      'MACUKRAINE',
589
      'MACGREEK',
590
      'MACTURKISH',
591
      'MACHEBREW',
592
      'MACARABIC',
593
      'MACTHAI',
594
      'HP-ROMAN8',
595
      'R8',
596
      'ROMAN8',
597
      'CSHPROMAN8',
598
      'NEXTSTEP',
599
      'ARMSCII-8',
600
      'GEORGIAN-ACADEMY',
601
      'GEORGIAN-PS',
602
      'KOI8-T',
603
      'CP154',
604
      'CYRILLIC-ASIAN',
605
      'PT154',
606
      'PTCP154',
607
      'CSPTCP154',
608
      'KZ-1048',
609
      'RK1048',
610
      'STRK1048-2002',
611
      'CSKZ1048',
612
      'MULELAO-1',
613
      'CP1133',
614
      'IBM-CP1133',
615
      'ISO-IR-166',
616
      'TIS-620',
617
      'TIS620',
618
      'TIS620-0',
619
      'TIS620.2529-1',
620
      'TIS620.2533-0',
621
      'TIS620.2533-1',
622
      'CP874',
623
      'WINDOWS-874',
624
      'VISCII',
625
      'VISCII1.1-1',
626
      'CSVISCII',
627
      'TCVN',
628
      'TCVN-5712',
629
      'TCVN5712-1',
630
      'TCVN5712-1:1993',
631
      'ISO-IR-14',
632
      'ISO646-JP',
633
      'JIS_C6220-1969-RO',
634
      'JP',
635
      'CSISO14JISC6220RO',
636
      'JISX0201-1976',
637
      'JIS_X0201',
638
      'X0201',
639
      'CSHALFWIDTHKATAKANA',
640
      'ISO-IR-87',
641
      'JIS0208',
642
      'JIS_C6226-1983',
643
      'JIS_X0208',
644
      'JIS_X0208-1983',
645
      'JIS_X0208-1990',
646
      'X0208',
647
      'CSISO87JISX0208',
648
      'ISO-IR-159',
649
      'JIS_X0212',
650
      'JIS_X0212-1990',
651
      'JIS_X0212.1990-0',
652
      'X0212',
653
      'CSISO159JISX02121990',
654
      'CN',
655
      'GB_1988-80',
656
      'ISO-IR-57',
657
      'ISO646-CN',
658
      'CSISO57GB1988',
659
      'CHINESE',
660
      'GB_2312-80',
661
      'ISO-IR-58',
662
      'CSISO58GB231280',
663
      'CN-GB-ISOIR165',
664
      'ISO-IR-165',
665
      'ISO-IR-149',
666
      'KOREAN',
667
      'KSC_5601',
668
      'KS_C_5601-1987',
669
      'KS_C_5601-1989',
670
      'CSKSC56011987',
671
      'EUC-JP',
672
      'EUCJP',
673
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
674
      'CSEUCPKDFMTJAPANESE',
675
      'MS_KANJI',
676
      'SHIFT-JIS',
677
      'SHIFT_JIS',
678
      'SJIS',
679
      'CSSHIFTJIS',
680
      'CP932',
681
      'ISO-2022-JP',
682
      'CSISO2022JP',
683
      'ISO-2022-JP-1',
684
      'ISO-2022-JP-2',
685
      'CSISO2022JP2',
686
      'CN-GB',
687
      'EUC-CN',
688
      'EUCCN',
689
      'GB2312',
690
      'CSGB2312',
691
      'GBK',
692
      'CP936',
693
      'MS936',
694
      'WINDOWS-936',
695
      'GB18030',
696
      'ISO-2022-CN',
697
      'CSISO2022CN',
698
      'ISO-2022-CN-EXT',
699
      'HZ',
700
      'HZ-GB-2312',
701
      'EUC-TW',
702
      'EUCTW',
703
      'CSEUCTW',
704
      'BIG-5',
705
      'BIG-FIVE',
706
      'BIG5',
707
      'BIGFIVE',
708
      'CN-BIG5',
709
      'CSBIG5',
710
      'CP950',
711
      'BIG5-HKSCS:1999',
712
      'BIG5-HKSCS:2001',
713
      'BIG5-HKSCS',
714
      'BIG5-HKSCS:2004',
715
      'BIG5HKSCS',
716
      'EUC-KR',
717
      'EUCKR',
718
      'CSEUCKR',
719
      'CP949',
720
      'UHC',
721
      'CP1361',
722
      'JOHAB',
723
      'ISO-2022-KR',
724
      'CSISO2022KR',
725
      'CP856',
726
      'CP922',
727
      'CP943',
728
      'CP1046',
729
      'CP1124',
730
      'CP1129',
731
      'CP1161',
732
      'IBM-1161',
733
      'IBM1161',
734
      'CSIBM1161',
735
      'CP1162',
736
      'IBM-1162',
737
      'IBM1162',
738
      'CSIBM1162',
739
      'CP1163',
740
      'IBM-1163',
741
      'IBM1163',
742
      'CSIBM1163',
743
      'DEC-KANJI',
744
      'DEC-HANYU',
745
      '437',
746
      'CP437',
747
      'IBM437',
748
      'CSPC8CODEPAGE437',
749
      'CP737',
750
      'CP775',
751
      'IBM775',
752
      'CSPC775BALTIC',
753
      '852',
754
      'CP852',
755
      'IBM852',
756
      'CSPCP852',
757
      'CP853',
758
      '855',
759
      'CP855',
760
      'IBM855',
761
      'CSIBM855',
762
      '857',
763
      'CP857',
764
      'IBM857',
765
      'CSIBM857',
766
      'CP858',
767
      '860',
768
      'CP860',
769
      'IBM860',
770
      'CSIBM860',
771
      '861',
772
      'CP-IS',
773
      'CP861',
774
      'IBM861',
775
      'CSIBM861',
776
      '863',
777
      'CP863',
778
      'IBM863',
779
      'CSIBM863',
780
      'CP864',
781
      'IBM864',
782
      'CSIBM864',
783
      '865',
784
      'CP865',
785
      'IBM865',
786
      'CSIBM865',
787
      '869',
788
      'CP-GR',
789
      'CP869',
790
      'IBM869',
791
      'CSIBM869',
792
      'CP1125',
793
      'EUC-JISX0213',
794
      'SHIFT_JISX0213',
795
      'ISO-2022-JP-3',
796
      'BIG5-2003',
797
      'ISO-IR-230',
798
      'TDS565',
799
      'ATARI',
800
      'ATARIST',
801
      'RISCOS-LATIN1',
802
  );
803
804
  /**
805
   * @var array
806
   */
807 1
  private static $SUPPORT = array();
808
809 1
  /**
810 1
   * __construct()
811
   */
812
  public function __construct()
813
  {
814
    self::checkForSupport();
815
  }
816
817
  /**
818
   * Return the character at the specified position: $str[1] like functionality.
819
   *
820 2
   * @param string $str <p>A UTF-8 string.</p>
821
   * @param int    $pos <p>The position of character to return.</p>
822 2
   *
823
   * @return string <p>Single Multi-Byte character.</p>
824
   */
825
  public static function access($str, $pos)
826
  {
827
    $str = (string)$str;
828
    $pos = (int)$pos;
829
830
    if (!isset($str[0])) {
831
      return '';
832
    }
833
834 1
    if ($pos < 0) {
835
      return '';
836 1
    }
837 1
838 1
    return self::substr($str, $pos, 1);
839
  }
840 1
841
  /**
842
   * Prepends UTF-8 BOM character to the string and returns the whole string.
843
   *
844
   * INFO: If BOM already existed there, the Input string is returned.
845
   *
846
   * @param string $str <p>The input string.</p>
847
   *
848
   * @return string <p>The output string that contains BOM.</p>
849
   */
850 1
  public static function add_bom_to_string($str)
851
  {
852 1
    if (self::string_has_bom($str) === false) {
853
      $str = self::bom() . $str;
854
    }
855
856
    return $str;
857
  }
858
859
  /**
860 2
   * Convert binary into an string.
861
   *
862 2
   * @param mixed $bin 1|0
863
   *
864
   * @return string
865
   */
866
  public static function binary_to_str($bin)
867
  {
868
    if (!isset($bin[0])) {
869
      return '';
870
    }
871
872
    return pack('H*', base_convert($bin, 2, 16));
873
  }
874 1
875
  /**
876 1
   * Returns the UTF-8 Byte Order Mark Character.
877
   *
878
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
879
   *
880
   * @return string UTF-8 Byte Order Mark
881
   */
882
  public static function bom()
883
  {
884 2
    return "\xef\xbb\xbf";
885
  }
886 2
887
  /**
888 1
   * @alias of UTF8::chr_map()
889
   *
890 1
   * @see   UTF8::chr_map()
891 1
   *
892 1
   * @param string|array $callback
893 1
   * @param string       $str
894 1
   *
895 1
   * @return array
896 2
   */
897
  public static function callback($callback, $str)
898
  {
899
    return self::chr_map($callback, $str);
900
  }
901
902
  /**
903
   * This method will auto-detect your server environment for UTF-8 support.
904
   *
905
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
906
   */
907 9
  public static function checkForSupport()
908
  {
909 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
910 9
911 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
912
913
      // http://php.net/manual/en/book.mbstring.php
914 9
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
915
916
      if (
917
          defined('MB_OVERLOAD_STRING')
918 9
          &&
919
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
920
      ) {
921
        self::$SUPPORT['mbstring_func_overload'] = true;
922
      } else {
923 9
        self::$SUPPORT['mbstring_func_overload'] = false;
924 9
      }
925 8
926
      // http://php.net/manual/en/book.iconv.php
927
      self::$SUPPORT['iconv'] = self::iconv_loaded();
928
929 8
      // http://php.net/manual/en/book.intl.php
930 6
      self::$SUPPORT['intl'] = self::intl_loaded();
931
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
932
      if (
933 7
          self::$SUPPORT['intl'] === true
934 6
          &&
935 6
          function_exists('transliterator_list_ids') === true
936
      ) {
937
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
938 7
      }
939 7
940 7
      // http://php.net/manual/en/class.intlchar.php
941 7
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
942
943
      // http://php.net/manual/en/book.pcre.php
944 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
945 1
    }
946 1
  }
947 1
948 1
  /**
949
   * Generates a UTF-8 encoded character from the given code point.
950
   *
951
   * INFO: opposite to UTF8::ord()
952
   *
953
   * @param int    $code_point <p>The code point for which to generate a character.</p>
954
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
955
   *
956
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
957
   */
958
  public static function chr($code_point, $encoding = 'UTF-8')
959
  {
960
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
961
      self::checkForSupport();
962
    }
963 1
964
    if ($encoding !== 'UTF-8') {
965 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
966
    } elseif (self::$SUPPORT['intlChar'] === true) {
967 1
      return \IntlChar::chr($code_point);
968
    }
969
970
    // check type of code_point, only if there is no support for "\IntlChar"
971
    $i = (int)$code_point;
972
    if ($i !== $code_point) {
973
      return null;
974
    }
975
976
    // use static cache, only if there is no support for "\IntlChar"
977
    static $CHAR_CACHE = array();
978
    $cacheKey = $code_point . $encoding;
979
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
980
      return $CHAR_CACHE[$cacheKey];
981
    }
982 4
983
    if (0x80 > $code_point %= 0x200000) {
984 4
      $str = self::chr_and_parse_int($code_point);
985 3
    } elseif (0x800 > $code_point) {
986
      $str = self::chr_and_parse_int(0xC0 | $code_point >> 6) .
987
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
988 4
    } elseif (0x10000 > $code_point) {
989
      $str = self::chr_and_parse_int(0xE0 | $code_point >> 12) .
990
             self::chr_and_parse_int(0x80 | $code_point >> 6 & 0x3F) .
991
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
992
    } else {
993
      $str = self::chr_and_parse_int(0xF0 | $code_point >> 18) .
994
             self::chr_and_parse_int(0x80 | $code_point >> 12 & 0x3F) .
995
             self::chr_and_parse_int(0x80 | $code_point >> 6 & 0x3F) .
996
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
997
    }
998 2
999
    if ($encoding !== 'UTF-8') {
1000 2
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1001 2
    }
1002 2
1003
    // add into static cache
1004 2
    $CHAR_CACHE[$cacheKey] = $str;
1005
1006 2
    return $str;
1007
  }
1008
1009 2
  /**
1010
   * @param int $int
1011 2
   *
1012 2
   * @return string
1013 2
   */
1014
  private static function chr_and_parse_int($int)
1015 1
  {
1016 1
    return chr((int)$int);
1017 1
  }
1018
1019
  /**
1020
   * Applies callback to all characters of a string.
1021
   *
1022
   * @param string|array $callback <p>The callback function.</p>
1023 2
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1024
   *
1025 2
   * @return array <p>The outcome of callback.</p>
1026 2
   */
1027
  public static function chr_map($callback, $str)
1028 2
  {
1029
    $chars = self::split($str);
1030
1031
    return array_map($callback, $chars);
1032
  }
1033
1034
  /**
1035
   * Generates an array of byte length of each character of a Unicode string.
1036
   *
1037
   * 1 byte => U+0000  - U+007F
1038
   * 2 byte => U+0080  - U+07FF
1039 1
   * 3 byte => U+0800  - U+FFFF
1040
   * 4 byte => U+10000 - U+10FFFF
1041 1
   *
1042
   * @param string $str <p>The original Unicode string.</p>
1043
   *
1044
   * @return array <p>An array of byte lengths of each character.</p>
1045
   */
1046
  public static function chr_size_list($str)
1047
  {
1048
    $str = (string)$str;
1049
1050
    if (!isset($str[0])) {
1051
      return array();
1052
    }
1053 1
1054
    return array_map(
1055 1
        function ($data) {
1056
          return UTF8::strlen($data, '8BIT');
1057
        },
1058
        self::split($str)
1059
    );
1060
  }
1061
1062
  /**
1063
   * Get a decimal code representation of a specific character.
1064
   *
1065
   * @param string $char <p>The input character.</p>
1066
   *
1067
   * @return int
1068
   */
1069
  public static function chr_to_decimal($char)
1070
  {
1071 44
    $char = (string)$char;
1072
    $code = self::ord($char[0]);
1073
    $bytes = 1;
1074
1075
    if (!($code & 0x80)) {
1076
      // 0xxxxxxx
1077
      return $code;
1078
    }
1079
1080
    if (($code & 0xe0) === 0xc0) {
1081
      // 110xxxxx
1082
      $bytes = 2;
1083
      $code &= ~0xc0;
1084
    } elseif (($code & 0xf0) === 0xe0) {
1085
      // 1110xxxx
1086 44
      $bytes = 3;
1087 44
      $code &= ~0xe0;
1088
    } elseif (($code & 0xf8) === 0xf0) {
1089 44
      // 11110xxx
1090 44
      $bytes = 4;
1091
      $code &= ~0xf0;
1092 44
    }
1093 17
1094 17
    for ($i = 2; $i <= $bytes; $i++) {
1095
      // 10xxxxxx
1096 44
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1097 12
    }
1098 12
1099
    return $code;
1100 44
  }
1101 5
1102 5
  /**
1103
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1104 44
   *
1105
   * @param string $char <p>The input character</p>
1106
   * @param string $pfix [optional]
1107
   *
1108
   * @return string <p>The code point encoded as U+xxxx<p>
1109
   */
1110
  public static function chr_to_hex($char, $pfix = 'U+')
1111
  {
1112
    $char = (string)$char;
1113
1114 4
    if (!isset($char[0])) {
1115
      return '';
1116 4
    }
1117
1118 4
    if ($char === '&#0;') {
1119 1
      $char = '';
1120
    }
1121
1122
    return self::int_to_hex(self::ord($char), $pfix);
1123 4
  }
1124
1125
  /**
1126
   * alias for "UTF8::chr_to_decimal()"
1127
   *
1128
   * @see UTF8::chr_to_decimal()
1129
   *
1130 4
   * @param string $chr
1131
   *
1132 4
   * @return int
1133
   */
1134
  public static function chr_to_int($chr)
1135
  {
1136
    return self::chr_to_decimal($chr);
1137
  }
1138
1139
  /**
1140
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1141
   *
1142
   * @param string $body     <p>The original string to be split.</p>
1143
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1144
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1145
   *
1146 5
   * @return string <p>The chunked string</p>
1147
   */
1148 5
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1149 5
  {
1150 5
    return implode($end, self::split($body, $chunklen));
1151
  }
1152 5
1153
  /**
1154 5
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1155 5
   *
1156 5
   * @param string $str                     <p>The string to be sanitized.</p>
1157
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1158 5
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1159
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1160 5
   *                                        => "..."</p>
1161 1
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1162
   *                                        $normalize_whitespace</p>
1163 1
   *
1164 1
   * @return string <p>Clean UTF-8 encoded string.</p>
1165 1
   */
1166
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1167 1
  {
1168 1
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1169
    // caused connection reset problem on larger strings
1170 5
1171
    $regx = '/
1172
      (
1173
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1174
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1175
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1176
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1177
        ){1,100}                      # ...one or more times
1178
      )
1179
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1180
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1181
    /x';
1182 6
    $str = preg_replace($regx, '$1', $str);
1183
1184 6
    $str = self::replace_diamond_question_mark($str, '');
1185
    $str = self::remove_invisible_characters($str);
1186
1187
    if ($normalize_whitespace === true) {
1188
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1189
    }
1190
1191
    if ($normalize_msword === true) {
1192
      $str = self::normalize_msword($str);
1193
    }
1194 1
1195
    if ($remove_bom === true) {
1196 1
      $str = self::remove_bom($str);
1197 1
    }
1198 1
1199
    return $str;
1200 1
  }
1201
1202
  /**
1203
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1204
   *
1205
   * @param string $str <p>The input string.</p>
1206
   *
1207
   * @return string
1208
   */
1209
  public static function cleanup($str)
1210
  {
1211
    $str = (string)$str;
1212
1213
    if (!isset($str[0])) {
1214
      return '';
1215
    }
1216 11
1217
    // fixed ISO <-> UTF-8 Errors
1218 11
    $str = self::fix_simple_utf8($str);
1219 11
1220
    // remove all none UTF-8 symbols
1221 11
    // && remove diamond question mark (�)
1222 5
    // && remove remove invisible characters (e.g. "\0")
1223
    // && remove BOM
1224
    // && normalize whitespace chars (but keep non-breaking-spaces)
1225 11
    $str = self::clean($str, true, true, false, true);
1226 1
1227 1
    return (string)$str;
1228
  }
1229 11
1230
  /**
1231
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1232
   *
1233 11
   * INFO: opposite to UTF8::string()
1234
   *
1235
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1236 11
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1237
   *                                    default, code points will be returned as integers.</p>
1238 1
   *
1239 11
   * @return array <p>The array of code points.</p>
1240
   */
1241
  public static function codepoints($arg, $u_style = false)
1242
  {
1243 11
    if (is_string($arg) === true) {
1244
      $arg = self::split($arg);
1245
    }
1246 11
1247 1
    $arg = array_map(
1248 1
        array(
1249 1
            '\\voku\\helper\\UTF8',
1250 11
            'ord',
1251 11
        ),
1252
        $arg
1253
    );
1254
1255
    if ($u_style) {
1256 2
      $arg = array_map(
1257
          array(
1258
              '\\voku\\helper\\UTF8',
1259 1
              'int_to_hex',
1260
          ),
1261
          $arg
1262 2
      );
1263 1
    }
1264
1265
    return $arg;
1266 2
  }
1267 2
1268 2
  /**
1269
   * Returns count of characters used in a string.
1270 2
   *
1271
   * @param string $str       <p>The input string.</p>
1272 2
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1273 2
   *
1274
   * @return array <p>An associative array of Character as keys and
1275
   *               their count as values.</p>
1276
   */
1277 1
  public static function count_chars($str, $cleanUtf8 = false)
1278
  {
1279
    return array_count_values(self::split($str, 1, $cleanUtf8));
1280
  }
1281
1282
  /**
1283
   * Converts a int-value into an UTF-8 character.
1284
   *
1285
   * @param mixed $int
1286
   *
1287
   * @return string
1288
   */
1289
  public static function decimal_to_chr($int)
1290
  {
1291
    if (Bootup::is_php('5.4') === true) {
1292
      $flags = ENT_QUOTES | ENT_HTML5;
1293
    } else {
1294
      $flags = ENT_QUOTES;
1295
    }
1296
1297
    return self::html_entity_decode('&#' . $int . ';', $flags);
1298
  }
1299
1300
  /**
1301
   * Encode a string with a new charset-encoding.
1302
   *
1303
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1304
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1305
   *
1306
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1307
   * @param string $str      <p>The input string</p>
1308
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1309
   *                         /> otherwise we auto-detect the current string-encoding</p>
1310
   *
1311
   * @return string
1312
   */
1313
  public static function encode($encoding, $str, $force = true)
1314
  {
1315
    $str = (string)$str;
1316
    $encoding = (string)$encoding;
1317
1318
    if (!isset($str[0], $encoding[0])) {
1319
      return $str;
1320
    }
1321
1322
    if ($encoding !== 'UTF-8') {
1323
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1324
    }
1325
1326
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1327
      self::checkForSupport();
1328
    }
1329
1330
    $encodingDetected = self::str_detect_encoding($str);
1331
1332
    if (
1333
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1334
        &&
1335
        (
1336
            $force === true
1337
            ||
1338
            $encodingDetected !== $encoding
1339
        )
1340
    ) {
1341
1342
      if (
1343
          $encoding === 'UTF-8'
1344
          &&
1345
          (
1346
              $force === true
1347
              || $encodingDetected === 'UTF-8'
1348
              || $encodingDetected === 'WINDOWS-1252'
1349
              || $encodingDetected === 'ISO-8859-1'
1350
          )
1351
      ) {
1352
        return self::to_utf8($str);
1353
      }
1354
1355
      if (
1356
          $encoding === 'ISO-8859-1'
1357
          &&
1358
          (
1359
              $force === true
1360
              || $encodingDetected === 'ISO-8859-1'
1361
              || $encodingDetected === 'UTF-8'
1362 2
          )
1363
      ) {
1364
        return self::to_iso8859($str);
1365 2
      }
1366 2
1367
      if (
1368 2
          $encoding !== 'UTF-8'
1369 2
          &&
1370
          $encoding !== 'WINDOWS-1252'
1371
          &&
1372
          self::$SUPPORT['mbstring'] === false
1373 2
      ) {
1374 2
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1375
      }
1376 2
1377 2
      $strEncoded = \mb_convert_encoding(
1378
          $str,
1379 2
          $encoding,
1380 1
          $encodingDetected
1381 1
      );
1382 2
1383
      if ($strEncoded) {
1384
        return $strEncoded;
1385
      }
1386 2
    }
1387 1
1388
    return $str;
1389
  }
1390 1
1391 1
  /**
1392 1
   * Reads entire file into a string.
1393 1
   *
1394
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1395 1
   *
1396
   * @link http://php.net/manual/en/function.file-get-contents.php
1397
   *
1398
   * @param string        $filename      <p>
1399
   *                                     Name of the file to read.
1400
   *                                     </p>
1401
   * @param int|false     $flags         [optional] <p>
1402
   *                                     Prior to PHP 6, this parameter is called
1403
   *                                     use_include_path and is a bool.
1404
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1405 1
   *                                     to trigger include path
1406
   *                                     search.
1407 1
   *                                     </p>
1408
   *                                     <p>
1409
   *                                     The value of flags can be any combination of
1410
   *                                     the following flags (with some restrictions), joined with the
1411
   *                                     binary OR (|)
1412
   *                                     operator.
1413
   *                                     </p>
1414
   *                                     <p>
1415
   *                                     <table>
1416
   *                                     Available flags
1417
   *                                     <tr valign="top">
1418
   *                                     <td>Flag</td>
1419 9
   *                                     <td>Description</td>
1420
   *                                     </tr>
1421 9
   *                                     <tr valign="top">
1422 9
   *                                     <td>
1423 3
   *                                     FILE_USE_INCLUDE_PATH
1424
   *                                     </td>
1425 3
   *                                     <td>
1426 3
   *                                     Search for filename in the include directory.
1427 3
   *                                     See include_path for more
1428 9
   *                                     information.
1429 2
   *                                     </td>
1430 2
   *                                     </tr>
1431 2
   *                                     <tr valign="top">
1432 2
   *                                     <td>
1433 9
   *                                     FILE_TEXT
1434
   *                                     </td>
1435 8
   *                                     <td>
1436
   *                                     As of PHP 6, the default encoding of the read
1437 2
   *                                     data is UTF-8. You can specify a different encoding by creating a
1438 2
   *                                     custom context or by changing the default using
1439
   *                                     stream_default_encoding. This flag cannot be
1440 8
   *                                     used with FILE_BINARY.
1441
   *                                     </td>
1442 8
   *                                     </tr>
1443 6
   *                                     <tr valign="top">
1444 6
   *                                     <td>
1445 6
   *                                     FILE_BINARY
1446
   *                                     </td>
1447 6
   *                                     <td>
1448 3
   *                                     With this flag, the file is read in binary mode. This is the default
1449 3
   *                                     setting and cannot be used with FILE_TEXT.
1450 5
   *                                     </td>
1451
   *                                     </tr>
1452
   *                                     </table>
1453
   *                                     </p>
1454
   * @param resource|null $context       [optional] <p>
1455 8
   *                                     A valid context resource created with
1456 8
   *                                     stream_context_create. If you don't need to use a
1457 5
   *                                     custom context, you can skip this parameter by &null;.
1458 8
   *                                     </p>
1459
   * @param int|null      $offset        [optional] <p>
1460
   *                                     The offset where the reading starts.
1461 2
   *                                     </p>
1462 2
   * @param int|null      $maxlen        [optional] <p>
1463 8
   *                                     Maximum length of data read. The default is to read until end
1464 8
   *                                     of file is reached.
1465 9
   *                                     </p>
1466
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1467 9
   *
1468
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1469
   *                                     or pdf, because they used non default utf-8 chars</p>
1470
   *
1471
   * @return string <p>The function returns the read data or false on failure.</p>
1472
   */
1473
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1474
  {
1475
    // init
1476
    $timeout = (int)$timeout;
1477
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1478
1479
    if ($timeout && $context === null) {
1480
      $context = stream_context_create(
1481
          array(
1482
              'http' =>
1483
                  array(
1484
                      'timeout' => $timeout,
1485
                  ),
1486
          )
1487
      );
1488
    }
1489
1490
    if (!$flags) {
1491
      $flags = false;
1492
    }
1493
1494
    if ($offset === null) {
1495
      $offset = 0;
1496
    }
1497
1498
    if (is_int($maxlen) === true) {
1499
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1500
    } else {
1501
      $data = file_get_contents($filename, $flags, $context, $offset);
1502
    }
1503
1504
    // return false on error
1505
    if ($data === false) {
1506
      return false;
1507
    }
1508
1509
    if ($convertToUtf8 === true) {
1510
      $data = self::encode('UTF-8', $data, false);
1511
      $data = self::cleanup($data);
1512
    }
1513
1514
    return $data;
1515
  }
1516
1517
  /**
1518
   * Checks if a file starts with BOM (Byte Order Mark) character.
1519
   *
1520 1
   * @param string $file_path <p>Path to a valid file.</p>
1521
   *
1522 1
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1523 1
   */
1524 1
  public static function file_has_bom($file_path)
1525 1
  {
1526
    return self::string_has_bom(file_get_contents($file_path));
1527
  }
1528 1
1529
  /**
1530
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1531
   *
1532
   * @param mixed  $var
1533
   * @param int    $normalization_form
1534
   * @param string $leading_combining
1535
   *
1536
   * @return mixed
1537
   */
1538
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1539
  {
1540 1
    switch (gettype($var)) {
1541 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1542 1
        foreach ($var as $k => $v) {
1543 1
          /** @noinspection AlterInForeachInspection */
1544 1
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1545 1
        }
1546
        break;
1547 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1548 1
        foreach ($var as $k => $v) {
1549
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1550
        }
1551
        break;
1552
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1553
1554
        if (false !== strpos($var, "\r")) {
1555
          // Workaround https://bugs.php.net/65732
1556
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1557
        }
1558
1559 1
        if (self::is_ascii($var) === false) {
1560
          /** @noinspection PhpUndefinedClassInspection */
1561 1
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1562
            $n = '-';
1563
          } else {
1564
            /** @noinspection PhpUndefinedClassInspection */
1565
            $n = \Normalizer::normalize($var, $normalization_form);
1566
1567
            if (isset($n[0])) {
1568
              $var = $n;
1569
            } else {
1570
              $var = self::encode('UTF-8', $var);
1571
            }
1572
          }
1573
1574
          if (
1575
              $var[0] >= "\x80"
1576
              &&
1577 7
              isset($n[0], $leading_combining[0])
1578
              &&
1579 7
              preg_match('/^\p{Mn}/u', $var)
1580 7
          ) {
1581
            // Prevent leading combining chars
1582 7
            // for NFC-safe concatenations.
1583
            $var = $leading_combining . $var;
1584 7
          }
1585 2
        }
1586
1587
        break;
1588 7
    }
1589 1
1590 1
    return $var;
1591 1
  }
1592
1593 7
  /**
1594
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1595
   *
1596
   * Gets a specific external variable by name and optionally filters it
1597
   *
1598
   * @link  http://php.net/manual/en/function.filter-input.php
1599
   *
1600
   * @param int    $type          <p>
1601
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1602
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1603 1
   *                              <b>INPUT_ENV</b>.
1604
   *                              </p>
1605 1
   * @param string $variable_name <p>
1606
   *                              Name of a variable to get.
1607 1
   *                              </p>
1608
   * @param int    $filter        [optional] <p>
1609
   *                              The ID of the filter to apply. The
1610 1
   *                              manual page lists the available filters.
1611 1
   *                              </p>
1612
   * @param mixed  $options       [optional] <p>
1613 1
   *                              Associative array of options or bitwise disjunction of flags. If filter
1614
   *                              accepts options, flags can be provided in "flags" field of array.
1615
   *                              </p>
1616 1
   *
1617 1
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1618 1
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1619 1
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1620 1
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1621
   * @since 5.2.0
1622 1
   */
1623 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1624
  {
1625
    if (4 > func_num_args()) {
1626
      $var = filter_input($type, $variable_name, $filter);
1627
    } else {
1628
      $var = filter_input($type, $variable_name, $filter, $options);
1629
    }
1630
1631
    return self::filter($var);
1632 1
  }
1633
1634 1
  /**
1635
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1636
   *
1637
   * Gets external variables and optionally filters them
1638 1
   *
1639
   * @link  http://php.net/manual/en/function.filter-input-array.php
1640
   *
1641
   * @param int   $type       <p>
1642
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1643
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1644
   *                          <b>INPUT_ENV</b>.
1645
   *                          </p>
1646
   * @param mixed $definition [optional] <p>
1647
   *                          An array defining the arguments. A valid key is a string
1648
   *                          containing a variable name and a valid value is either a filter type, or an array
1649
   *                          optionally specifying the filter, flags and options. If the value is an
1650
   *                          array, valid keys are filter which specifies the
1651
   *                          filter type,
1652
   *                          flags which specifies any flags that apply to the
1653
   *                          filter, and options which specifies any options that
1654 1
   *                          apply to the filter. See the example below for a better understanding.
1655
   *                          </p>
1656 1
   *                          <p>
1657 1
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1658
   *                          input array are filtered by this filter.
1659
   *                          </p>
1660 1
   * @param bool  $add_empty  [optional] <p>
1661
   *                          Add missing keys as <b>NULL</b> to the return value.
1662 1
   *                          </p>
1663 1
   *
1664 1
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1665 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1666 1
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1667 1
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1668 1
   * fails.
1669 1
   * @since 5.2.0
1670 1
   */
1671 1 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1672 1
  {
1673
    if (2 > func_num_args()) {
1674
      $a = filter_input_array($type);
1675
    } else {
1676
      $a = filter_input_array($type, $definition, $add_empty);
1677
    }
1678
1679
    return self::filter($a);
1680
  }
1681
1682
  /**
1683
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1684
   *
1685
   * Filters a variable with a specified filter
1686
   *
1687
   * @link  http://php.net/manual/en/function.filter-var.php
1688
   *
1689
   * @param mixed $variable <p>
1690
   *                        Value to filter.
1691
   *                        </p>
1692 1
   * @param int   $filter   [optional] <p>
1693 1
   *                        The ID of the filter to apply. The
1694
   *                        manual page lists the available filters.
1695
   *                        </p>
1696
   * @param mixed $options  [optional] <p>
1697
   *                        Associative array of options or bitwise disjunction of flags. If filter
1698
   *                        accepts options, flags can be provided in "flags" field of array. For
1699
   *                        the "callback" filter, callable type should be passed. The
1700
   *                        callback must accept one argument, the value to be filtered, and return
1701
   *                        the value after filtering/sanitizing it.
1702
   *                        </p>
1703
   *                        <p>
1704
   *                        <code>
1705
   *                        // for filters that accept options, use this format
1706
   *                        $options = array(
1707
   *                        'options' => array(
1708
   *                        'default' => 3, // value to return if the filter fails
1709
   *                        // other options here
1710
   *                        'min_range' => 0
1711
   *                        ),
1712
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1713
   *                        );
1714
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1715
   *                        // for filter that only accept flags, you can pass them directly
1716
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1717
   *                        // for filter that only accept flags, you can also pass as an array
1718
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1719
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1720
   *                        // callback validate filter
1721
   *                        function foo($value)
1722
   *                        {
1723
   *                        // Expected format: Surname, GivenNames
1724
   *                        if (strpos($value, ", ") === false) return false;
1725
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1726
   *                        $empty = (empty($surname) || empty($givennames));
1727
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1728
   *                        if ($empty || $notstrings) {
1729
   *                        return false;
1730
   *                        } else {
1731
   *                        return $value;
1732
   *                        }
1733
   *                        }
1734
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1735
   *                        </code>
1736
   *                        </p>
1737
   *
1738
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1739
   * @since 5.2.0
1740
   */
1741 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1742
  {
1743
    if (3 > func_num_args()) {
1744
      $variable = filter_var($variable, $filter);
1745
    } else {
1746
      $variable = filter_var($variable, $filter, $options);
1747
    }
1748
1749
    return self::filter($variable);
1750
  }
1751
1752 1
  /**
1753
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1754 1
   *
1755 1
   * Gets multiple variables and optionally filters them
1756
   *
1757 1
   * @link  http://php.net/manual/en/function.filter-var-array.php
1758
   *
1759
   * @param array $data       <p>
1760
   *                          An array with string keys containing the data to filter.
1761
   *                          </p>
1762
   * @param mixed $definition [optional] <p>
1763
   *                          An array defining the arguments. A valid key is a string
1764
   *                          containing a variable name and a valid value is either a
1765
   *                          filter type, or an
1766
   *                          array optionally specifying the filter, flags and options.
1767
   *                          If the value is an array, valid keys are filter
1768
   *                          which specifies the filter type,
1769
   *                          flags which specifies any flags that apply to the
1770
   *                          filter, and options which specifies any options that
1771
   *                          apply to the filter. See the example below for a better understanding.
1772 1
   *                          </p>
1773
   *                          <p>
1774 1
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1775
   *                          input array are filtered by this filter.
1776
   *                          </p>
1777
   * @param bool  $add_empty  [optional] <p>
1778
   *                          Add missing keys as <b>NULL</b> to the return value.
1779
   *                          </p>
1780
   *
1781
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1782
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1783
   * the variable is not set.
1784
   * @since 5.2.0
1785
   */
1786 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1787
  {
1788 1
    if (2 > func_num_args()) {
1789 1
      $a = filter_var_array($data);
1790
    } else {
1791
      $a = filter_var_array($data, $definition, $add_empty);
1792 1
    }
1793 1
1794
    return self::filter($a);
1795
  }
1796 1
1797
  /**
1798
   * Check if the number of unicode characters are not more than the specified integer.
1799
   *
1800
   * @param string $str      The original string to be checked.
1801
   * @param int    $box_size The size in number of chars to be checked against string.
1802
   *
1803
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1804
   */
1805
  public static function fits_inside($str, $box_size)
1806
  {
1807
    return (self::strlen($str) <= $box_size);
1808
  }
1809
1810 1
  /**
1811
   * Try to fix simple broken UTF-8 strings.
1812 1
   *
1813
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1814
   *
1815
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1816
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1817
   * See: http://en.wikipedia.org/wiki/Windows-1252
1818
   *
1819
   * @param string $str <p>The input string</p>
1820
   *
1821
   * @return string
1822
   */
1823 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1824
  {
1825
    // init
1826 2
    $str = (string)$str;
1827
1828
    if (!isset($str[0])) {
1829 2
      return '';
1830
    }
1831 2
1832 2
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1833 1
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1834 1
1835
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1836 2
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1837 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1838 1
    }
1839
1840 2
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1841 2
  }
1842 2
1843
  /**
1844 2
   * Fix a double (or multiple) encoded UTF8 string.
1845
   *
1846
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1847
   *
1848
   * @return mixed
1849
   */
1850
  public static function fix_utf8($str)
1851
  {
1852
    if (is_array($str) === true) {
1853
1854
      /** @noinspection ForeachSourceInspection */
1855
      foreach ($str as $k => $v) {
1856
        /** @noinspection AlterInForeachInspection */
1857
        /** @noinspection OffsetOperationsInspection */
1858
        $str[$k] = self::fix_utf8($v);
1859
      }
1860
1861
      return $str;
1862
    }
1863
1864
    $last = '';
1865
    while ($last !== $str) {
1866
      $last = $str;
1867
      $str = self::to_utf8(
1868
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1867 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
Security Bug introduced by
It seems like self::utf8_decode($str) targeting voku\helper\UTF8::utf8_decode() can also be of type false; however, voku\helper\UTF8::to_utf8() does only seem to accept string|array<integer,string>, did you maybe forget to handle an error condition?
Loading history...
1869
      );
1870
    }
1871
1872
    return $str;
1873
  }
1874
1875
  /**
1876
   * Get character of a specific character.
1877
   *
1878
   * @param string $char
1879
   *
1880
   * @return string <p>'RTL' or 'LTR'</p>
1881
   */
1882
  public static function getCharDirection($char)
1883
  {
1884
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1885
      self::checkForSupport();
1886
    }
1887
1888
    if (self::$SUPPORT['intlChar'] === true) {
1889
      $tmpReturn = \IntlChar::charDirection($char);
1890
1891
      // from "IntlChar"-Class
1892
      $charDirection = array(
1893
          'RTL' => array(1, 13, 14, 15, 21),
1894
          'LTR' => array(0, 11, 12, 20),
1895
      );
1896
1897
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1898
        return 'LTR';
1899
      }
1900
1901
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1902
        return 'RTL';
1903
      }
1904
    }
1905
1906
    $c = static::chr_to_decimal($char);
1907
1908
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1909
      return 'LTR';
1910
    }
1911
1912
    if (0x85e >= $c) {
1913
1914
      if (0x5be === $c ||
1915
          0x5c0 === $c ||
1916
          0x5c3 === $c ||
1917
          0x5c6 === $c ||
1918
          (0x5d0 <= $c && 0x5ea >= $c) ||
1919
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1920
          0x608 === $c ||
1921
          0x60b === $c ||
1922
          0x60d === $c ||
1923
          0x61b === $c ||
1924
          (0x61e <= $c && 0x64a >= $c) ||
1925
          (0x66d <= $c && 0x66f >= $c) ||
1926 9
          (0x671 <= $c && 0x6d5 >= $c) ||
1927
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1928 9
          (0x6ee <= $c && 0x6ef >= $c) ||
1929
          (0x6fa <= $c && 0x70d >= $c) ||
1930 9
          0x710 === $c ||
1931 6
          (0x712 <= $c && 0x72f >= $c) ||
1932
          (0x74d <= $c && 0x7a5 >= $c) ||
1933
          0x7b1 === $c ||
1934 9
          (0x7c0 <= $c && 0x7ea >= $c) ||
1935 7
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1936
          0x7fa === $c ||
1937
          (0x800 <= $c && 0x815 >= $c) ||
1938
          0x81a === $c ||
1939 9
          0x824 === $c ||
1940 9
          0x828 === $c ||
1941
          (0x830 <= $c && 0x83e >= $c) ||
1942 9
          (0x840 <= $c && 0x858 >= $c) ||
1943 9
          0x85e === $c
1944 9
      ) {
1945 9
        return 'RTL';
1946 9
      }
1947 6
1948
    } elseif (0x200f === $c) {
1949
1950 9
      return 'RTL';
1951 2
1952 2
    } elseif (0xfb1d <= $c) {
1953
1954 9
      if (0xfb1d === $c ||
1955 4
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1956 4
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1957 4
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1958
          0xfb3e === $c ||
1959
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1960 4
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1961
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1962
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1963 9
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1964
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1965 9
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1966 9
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1967
          (0xfe76 <= $c && 0xfefc >= $c) ||
1968 7
          (0x10800 <= $c && 0x10805 >= $c) ||
1969
          0x10808 === $c ||
1970 7
          (0x1080a <= $c && 0x10835 >= $c) ||
1971 6
          (0x10837 <= $c && 0x10838 >= $c) ||
1972
          0x1083c === $c ||
1973 4
          (0x1083f <= $c && 0x10855 >= $c) ||
1974
          (0x10857 <= $c && 0x1085f >= $c) ||
1975 9
          (0x10900 <= $c && 0x1091b >= $c) ||
1976
          (0x10920 <= $c && 0x10939 >= $c) ||
1977 9
          0x1093f === $c ||
1978
          0x10a00 === $c ||
1979
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1980 9
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1981 9
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1982 9
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1983
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1984 9
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1985
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1986 9
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1987
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1988 9
          (0x10b78 <= $c && 0x10b7f >= $c)
1989
      ) {
1990
        return 'RTL';
1991
      }
1992
    }
1993
1994
    return 'LTR';
1995
  }
1996
1997
  /**
1998
   * get data from "/data/*.ser"
1999
   *
2000
   * @param string $file
2001
   *
2002
   * @return bool|string|array|int <p>Will return false on error.</p>
2003
   */
2004
  private static function getData($file)
2005
  {
2006
    $file = __DIR__ . '/data/' . $file . '.php';
2007
    if (file_exists($file)) {
2008
      /** @noinspection PhpIncludeInspection */
2009
      return require $file;
2010
    }
2011
2012
    return false;
2013
  }
2014
2015
  /**
2016
   * Check for php-support.
2017
   *
2018
   * @param string|null $key
2019
   *
2020
   * @return bool[]|bool|null return the full support-array, if $key === null<br />
2021
   *                          return bool-value, if $key is used and available<br />
2022
   *                          otherwise return null
2023
   */
2024
  public static function getSupportInfo($key = null)
2025
  {
2026
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2027
      self::checkForSupport();
2028
    }
2029
2030
    if ($key === null) {
2031
      return self::$SUPPORT;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return self::$SUPPORT; (array) is incompatible with the return type documented by voku\helper\UTF8::getSupportInfo of type boolean[]|boolean|null.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
2032
    }
2033
2034
    if (!isset(self::$SUPPORT[$key])) {
2035
      return null;
2036
    }
2037
2038
    return self::$SUPPORT[$key];
2039
  }
2040
2041
  /**
2042
   * alias for "UTF8::string_has_bom()"
2043
   *
2044
   * @see UTF8::string_has_bom()
2045
   *
2046
   * @param string $str
2047
   *
2048
   * @return bool
2049
   *
2050
   * @deprecated
2051
   */
2052
  public static function hasBom($str)
2053
  {
2054
    return self::string_has_bom($str);
2055
  }
2056
2057
  /**
2058
   * Converts a hexadecimal-value into an UTF-8 character.
2059
   *
2060
   * @param string $hexdec <p>The hexadecimal value.</p>
2061
   *
2062
   * @return string|false <p>One single UTF-8 character.</p>
2063
   */
2064
  public static function hex_to_chr($hexdec)
2065
  {
2066
    return self::decimal_to_chr(hexdec($hexdec));
2067
  }
2068
2069
  /**
2070
   * Converts hexadecimal U+xxxx code point representation to integer.
2071
   *
2072
   * INFO: opposite to UTF8::int_to_hex()
2073
   *
2074
   * @param string $hexdec <p>The hexadecimal code point representation.</p>
2075
   *
2076
   * @return int|false <p>The code point, or false on failure.</p>
2077
   */
2078
  public static function hex_to_int($hexdec)
2079
  {
2080
    $hexdec = (string)$hexdec;
2081
2082
    if (!isset($hexdec[0])) {
2083
      return false;
2084
    }
2085
2086
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexdec, $match)) {
2087
      return intval($match[1], 16);
2088
    }
2089
2090
    return false;
2091
  }
2092
2093
  /**
2094 2
   * alias for "UTF8::html_entity_decode()"
2095
   *
2096 2
   * @see UTF8::html_entity_decode()
2097 1
   *
2098 1
   * @param string $str
2099
   * @param int    $flags
2100 2
   * @param string $encoding
2101
   *
2102 2
   * @return string
2103 1
   */
2104
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2105
  {
2106 2
    return self::html_entity_decode($str, $flags, $encoding);
2107 2
  }
2108 2
2109 2
  /**
2110 2
   * Converts a UTF-8 string to a series of HTML numbered entities.
2111 1
   *
2112
   * INFO: opposite to UTF8::html_decode()
2113 1
   *
2114 1
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2115 1
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2116 1
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2117 1
   *
2118 2
   * @return string <p>HTML numbered entities.</p>
2119
   */
2120 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2121
  {
2122
    // init
2123
    $str = (string)$str;
2124
2125
    if (!isset($str[0])) {
2126
      return '';
2127
    }
2128
2129
    if ($encoding !== 'UTF-8') {
2130
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2131
    }
2132
2133
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2134
    if (function_exists('mb_encode_numericentity')) {
2135
2136
      $startCode = 0x00;
2137
      if ($keepAsciiChars === true) {
2138
        $startCode = 0x80;
2139
      }
2140
2141
      return mb_encode_numericentity(
2142
          $str,
2143
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2144
          $encoding
2145
      );
2146
    }
2147
2148
    return implode(
2149
        '',
2150
        array_map(
2151
            function ($data) use ($keepAsciiChars, $encoding) {
2152
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2153
            },
2154
            self::split($str)
2155
        )
2156
    );
2157
  }
2158
2159
  /**
2160
   * UTF-8 version of html_entity_decode()
2161
   *
2162
   * The reason we are not using html_entity_decode() by itself is because
2163
   * while it is not technically correct to leave out the semicolon
2164
   * at the end of an entity most browsers will still interpret the entity
2165
   * correctly. html_entity_decode() does not convert entities without
2166
   * semicolons, so we are left with our own little solution here. Bummer.
2167
   *
2168
   * Convert all HTML entities to their applicable characters
2169
   *
2170
   * INFO: opposite to UTF8::html_encode()
2171
   *
2172
   * @link http://php.net/manual/en/function.html-entity-decode.php
2173
   *
2174
   * @param string $str      <p>
2175
   *                         The input string.
2176
   *                         </p>
2177
   * @param int    $flags    [optional] <p>
2178
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2179
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2180
   *                         <table>
2181
   *                         Available <i>flags</i> constants
2182
   *                         <tr valign="top">
2183
   *                         <td>Constant Name</td>
2184
   *                         <td>Description</td>
2185
   *                         </tr>
2186
   *                         <tr valign="top">
2187
   *                         <td><b>ENT_COMPAT</b></td>
2188
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2189
   *                         </tr>
2190
   *                         <tr valign="top">
2191
   *                         <td><b>ENT_QUOTES</b></td>
2192
   *                         <td>Will convert both double and single quotes.</td>
2193
   *                         </tr>
2194
   *                         <tr valign="top">
2195
   *                         <td><b>ENT_NOQUOTES</b></td>
2196
   *                         <td>Will leave both double and single quotes unconverted.</td>
2197
   *                         </tr>
2198
   *                         <tr valign="top">
2199
   *                         <td><b>ENT_HTML401</b></td>
2200
   *                         <td>
2201
   *                         Handle code as HTML 4.01.
2202
   *                         </td>
2203
   *                         </tr>
2204
   *                         <tr valign="top">
2205
   *                         <td><b>ENT_XML1</b></td>
2206
   *                         <td>
2207
   *                         Handle code as XML 1.
2208
   *                         </td>
2209
   *                         </tr>
2210
   *                         <tr valign="top">
2211
   *                         <td><b>ENT_XHTML</b></td>
2212
   *                         <td>
2213
   *                         Handle code as XHTML.
2214
   *                         </td>
2215
   *                         </tr>
2216
   *                         <tr valign="top">
2217
   *                         <td><b>ENT_HTML5</b></td>
2218
   *                         <td>
2219
   *                         Handle code as HTML 5.
2220
   *                         </td>
2221
   *                         </tr>
2222
   *                         </table>
2223
   *                         </p>
2224
   * @param string $encoding [optional] <p>Encoding to use.</p>
2225
   *
2226
   * @return string <p>The decoded string.</p>
2227
   */
2228
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2229
  {
2230
    // init
2231
    $str = (string)$str;
2232 1
2233
    if (!isset($str[0])) {
2234 1
      return '';
2235
    }
2236
2237
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2238 1
      return $str;
2239
    }
2240
2241
    if (
2242
        strpos($str, '&') === false
2243
        ||
2244
        (
2245
            strpos($str, '&#') === false
2246 1
            &&
2247
            strpos($str, ';') === false
2248 1
        )
2249
    ) {
2250
      return $str;
2251
    }
2252
2253
    if ($encoding !== 'UTF-8') {
2254
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2255
    }
2256
2257
    if ($flags === null) {
2258
      if (Bootup::is_php('5.4') === true) {
2259
        $flags = ENT_QUOTES | ENT_HTML5;
2260
      } else {
2261 3
        $flags = ENT_QUOTES;
2262
      }
2263 3
    }
2264 3
2265
    do {
2266 3
      $str_compare = $str;
2267
2268 3
      $str = preg_replace_callback(
2269
          "/&#\d{2,6};/",
2270
          function ($matches) use ($encoding) {
2271
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2272
2273
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2274
              return $returnTmp;
2275
            }
2276
2277
            return $matches[0];
2278
          },
2279 1
          $str
2280
      );
2281 1
2282
      // decode numeric & UTF16 two byte entities
2283
      $str = html_entity_decode(
2284
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2285
          $flags,
2286
          $encoding
2287
      );
2288
2289 2
    } while ($str_compare !== $str);
2290
2291 2
    return $str;
2292
  }
2293
2294
  /**
2295
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2296
   *
2297
   * @link http://php.net/manual/en/function.htmlentities.php
2298
   *
2299
   * @param string $str           <p>
2300
   *                              The input string.
2301
   *                              </p>
2302
   * @param int    $flags         [optional] <p>
2303 2
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2304
   *                              invalid code unit sequences and the used document type. The default is
2305 2
   *                              ENT_COMPAT | ENT_HTML401.
2306
   *                              <table>
2307
   *                              Available <i>flags</i> constants
2308
   *                              <tr valign="top">
2309
   *                              <td>Constant Name</td>
2310
   *                              <td>Description</td>
2311
   *                              </tr>
2312
   *                              <tr valign="top">
2313
   *                              <td><b>ENT_COMPAT</b></td>
2314
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2315
   *                              </tr>
2316
   *                              <tr valign="top">
2317 1
   *                              <td><b>ENT_QUOTES</b></td>
2318
   *                              <td>Will convert both double and single quotes.</td>
2319 1
   *                              </tr>
2320
   *                              <tr valign="top">
2321
   *                              <td><b>ENT_NOQUOTES</b></td>
2322
   *                              <td>Will leave both double and single quotes unconverted.</td>
2323
   *                              </tr>
2324
   *                              <tr valign="top">
2325
   *                              <td><b>ENT_IGNORE</b></td>
2326
   *                              <td>
2327
   *                              Silently discard invalid code unit sequences instead of returning
2328
   *                              an empty string. Using this flag is discouraged as it
2329
   *                              may have security implications.
2330
   *                              </td>
2331
   *                              </tr>
2332
   *                              <tr valign="top">
2333
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2334
   *                              <td>
2335
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2336
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2337
   *                              </td>
2338
   *                              </tr>
2339
   *                              <tr valign="top">
2340
   *                              <td><b>ENT_DISALLOWED</b></td>
2341
   *                              <td>
2342
   *                              Replace invalid code points for the given document type with a
2343
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2344
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2345
   *                              instance, to ensure the well-formedness of XML documents with
2346
   *                              embedded external content.
2347
   *                              </td>
2348
   *                              </tr>
2349
   *                              <tr valign="top">
2350
   *                              <td><b>ENT_HTML401</b></td>
2351
   *                              <td>
2352
   *                              Handle code as HTML 4.01.
2353
   *                              </td>
2354
   *                              </tr>
2355
   *                              <tr valign="top">
2356
   *                              <td><b>ENT_XML1</b></td>
2357
   *                              <td>
2358
   *                              Handle code as XML 1.
2359 1
   *                              </td>
2360
   *                              </tr>
2361 1
   *                              <tr valign="top">
2362
   *                              <td><b>ENT_XHTML</b></td>
2363
   *                              <td>
2364
   *                              Handle code as XHTML.
2365
   *                              </td>
2366
   *                              </tr>
2367
   *                              <tr valign="top">
2368
   *                              <td><b>ENT_HTML5</b></td>
2369
   *                              <td>
2370
   *                              Handle code as HTML 5.
2371
   *                              </td>
2372
   *                              </tr>
2373
   *                              </table>
2374
   *                              </p>
2375
   * @param string $encoding      [optional] <p>
2376
   *                              Like <b>htmlspecialchars</b>,
2377
   *                              <b>htmlentities</b> takes an optional third argument
2378
   *                              <i>encoding</i> which defines encoding used in
2379
   *                              conversion.
2380
   *                              Although this argument is technically optional, you are highly
2381
   *                              encouraged to specify the correct value for your code.
2382
   *                              </p>
2383
   * @param bool   $double_encode [optional] <p>
2384
   *                              When <i>double_encode</i> is turned off PHP will not
2385
   *                              encode existing html entities. The default is to convert everything.
2386
   *                              </p>
2387 1
   *
2388
   *
2389 1
   * @return string the encoded string.
2390
   * </p>
2391
   * <p>
2392
   * If the input <i>string</i> contains an invalid code unit
2393
   * sequence within the given <i>encoding</i> an empty string
2394
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2395
   * <b>ENT_SUBSTITUTE</b> flags are set.
2396
   */
2397
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2398
  {
2399
    if ($encoding !== 'UTF-8') {
2400
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2401 1
    }
2402
2403 1
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2404
2405
    if ($encoding !== 'UTF-8') {
2406
      return $str;
2407
    }
2408
2409
    $byteLengths = self::chr_size_list($str);
2410
    $search = array();
2411
    $replacements = array();
2412
    foreach ($byteLengths as $counter => $byteLength) {
2413
      if ($byteLength >= 3) {
2414
        $char = self::access($str, $counter);
2415
2416 16
        if (!isset($replacements[$char])) {
2417
          $search[$char] = $char;
2418 16
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2414 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2419
        }
2420
      }
2421
    }
2422
2423
    return str_replace($search, $replacements, $str);
2424
  }
2425
2426
  /**
2427
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2428
   *
2429
   * INFO: Take a look at "UTF8::htmlentities()"
2430
   *
2431 28
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2432
   *
2433 28
   * @param string $str           <p>
2434
   *                              The string being converted.
2435 28
   *                              </p>
2436 5
   * @param int    $flags         [optional] <p>
2437
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2438
   *                              invalid code unit sequences and the used document type. The default is
2439 28
   *                              ENT_COMPAT | ENT_HTML401.
2440
   *                              <table>
2441
   *                              Available <i>flags</i> constants
2442
   *                              <tr valign="top">
2443
   *                              <td>Constant Name</td>
2444
   *                              <td>Description</td>
2445
   *                              </tr>
2446
   *                              <tr valign="top">
2447
   *                              <td><b>ENT_COMPAT</b></td>
2448
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2449 1
   *                              </tr>
2450
   *                              <tr valign="top">
2451 1
   *                              <td><b>ENT_QUOTES</b></td>
2452
   *                              <td>Will convert both double and single quotes.</td>
2453 1
   *                              </tr>
2454 1
   *                              <tr valign="top">
2455
   *                              <td><b>ENT_NOQUOTES</b></td>
2456
   *                              <td>Will leave both double and single quotes unconverted.</td>
2457 1
   *                              </tr>
2458 1
   *                              <tr valign="top">
2459
   *                              <td><b>ENT_IGNORE</b></td>
2460 1
   *                              <td>
2461
   *                              Silently discard invalid code unit sequences instead of returning
2462
   *                              an empty string. Using this flag is discouraged as it
2463
   *                              may have security implications.
2464
   *                              </td>
2465
   *                              </tr>
2466
   *                              <tr valign="top">
2467
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2468
   *                              <td>
2469
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2470
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2471 16
   *                              </td>
2472
   *                              </tr>
2473
   *                              <tr valign="top">
2474 16
   *                              <td><b>ENT_DISALLOWED</b></td>
2475
   *                              <td>
2476
   *                              Replace invalid code points for the given document type with a
2477 16
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2478
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2479 16
   *                              instance, to ensure the well-formedness of XML documents with
2480 16
   *                              embedded external content.
2481 15
   *                              </td>
2482 16
   *                              </tr>
2483 6
   *                              <tr valign="top">
2484
   *                              <td><b>ENT_HTML401</b></td>
2485 15
   *                              <td>
2486
   *                              Handle code as HTML 4.01.
2487
   *                              </td>
2488
   *                              </tr>
2489
   *                              <tr valign="top">
2490
   *                              <td><b>ENT_XML1</b></td>
2491
   *                              <td>
2492
   *                              Handle code as XML 1.
2493
   *                              </td>
2494
   *                              </tr>
2495
   *                              <tr valign="top">
2496
   *                              <td><b>ENT_XHTML</b></td>
2497
   *                              <td>
2498
   *                              Handle code as XHTML.
2499
   *                              </td>
2500
   *                              </tr>
2501
   *                              <tr valign="top">
2502
   *                              <td><b>ENT_HTML5</b></td>
2503
   *                              <td>
2504
   *                              Handle code as HTML 5.
2505
   *                              </td>
2506
   *                              </tr>
2507
   *                              </table>
2508
   *                              </p>
2509
   * @param string $encoding      [optional] <p>
2510
   *                              Defines encoding used in conversion.
2511
   *                              </p>
2512
   *                              <p>
2513
   *                              For the purposes of this function, the encodings
2514
   *                              ISO-8859-1, ISO-8859-15,
2515
   *                              UTF-8, cp866,
2516
   *                              cp1251, cp1252, and
2517
   *                              KOI8-R are effectively equivalent, provided the
2518
   *                              <i>string</i> itself is valid for the encoding, as
2519
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2520
   *                              the same positions in all of these encodings.
2521
   *                              </p>
2522
   * @param bool   $double_encode [optional] <p>
2523
   *                              When <i>double_encode</i> is turned off PHP will not
2524
   *                              encode existing html entities, the default is to convert everything.
2525
   *                              </p>
2526
   *
2527
   * @return string The converted string.
2528
   * </p>
2529
   * <p>
2530
   * If the input <i>string</i> contains an invalid code unit
2531
   * sequence within the given <i>encoding</i> an empty string
2532
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2533
   * <b>ENT_SUBSTITUTE</b> flags are set.
2534
   */
2535
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2536 1
  {
2537
    if ($encoding !== 'UTF-8') {
2538 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2539
    }
2540 1
2541
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2542
  }
2543
2544
  /**
2545 1
   * Checks whether iconv is available on the server.
2546
   *
2547 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2548
   */
2549 1
  public static function iconv_loaded()
2550 1
  {
2551
    $return = extension_loaded('iconv') ? true : false;
2552 1
2553
    // INFO: "default_charset" is already set by the "Bootup"-class
2554
2555
    if (Bootup::is_php('5.6') === false) {
2556
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2557
      iconv_set_encoding('input_encoding', 'UTF-8');
2558
      iconv_set_encoding('output_encoding', 'UTF-8');
2559
      iconv_set_encoding('internal_encoding', 'UTF-8');
2560
    }
2561
2562
    return $return;
2563 1
  }
2564
2565 1
  /**
2566
   * alias for "UTF8::decimal_to_chr()"
2567 1
   *
2568
   * @see UTF8::decimal_to_chr()
2569
   *
2570
   * @param mixed $int
2571
   *
2572 1
   * @return string
2573 1
   */
2574 1
  public static function int_to_chr($int)
2575 1
  {
2576 1
    return self::decimal_to_chr($int);
2577
  }
2578 1
2579
  /**
2580
   * Converts Integer to hexadecimal U+xxxx code point representation.
2581
   *
2582
   * INFO: opposite to UTF8::hex_to_int()
2583
   *
2584
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2585
   * @param string $pfix [optional]
2586
   *
2587
   * @return string <p>The code point, or empty string on failure.</p>
2588
   */
2589
  public static function int_to_hex($int, $pfix = 'U+')
2590
  {
2591
    if ((int)$int === $int) {
2592
      $hex = dechex($int);
2593 4
2594
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2595 4
2596
      return $pfix . $hex;
2597 4
    }
2598
2599 4
    return '';
2600 4
  }
2601 4
2602 4
  /**
2603 4
   * Checks whether intl-char is available on the server.
2604 4
   *
2605 4
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2606 4
   */
2607 4
  public static function intlChar_loaded()
2608 2
  {
2609 2
    return (
2610 4
        Bootup::is_php('7.0') === true
2611 4
        &&
2612 4
        class_exists('IntlChar') === true
2613
    );
2614 4
  }
2615 4
2616 4
  /**
2617 4
   * Checks whether intl is available on the server.
2618 4
   *
2619 4
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2620 4
   */
2621 4
  public static function intl_loaded()
2622 4
  {
2623 3
    return extension_loaded('intl') ? true : false;
2624 3
  }
2625 4
2626 4
  /**
2627 4
   * alias for "UTF8::is_ascii()"
2628
   *
2629 4
   * @see UTF8::is_ascii()
2630 3
   *
2631 2
   * @param string $str
2632
   *
2633 3
   * @return boolean
2634
   *
2635
   * @deprecated
2636
   */
2637 3
  public static function isAscii($str)
2638
  {
2639 3
    return self::is_ascii($str);
2640
  }
2641
2642
  /**
2643
   * alias for "UTF8::is_base64()"
2644
   *
2645
   * @see UTF8::is_base64()
2646
   *
2647
   * @param string $str
2648
   *
2649
   * @return bool
2650
   *
2651
   * @deprecated
2652
   */
2653 3
  public static function isBase64($str)
2654
  {
2655 3
    return self::is_base64($str);
2656
  }
2657 3
2658
  /**
2659 3
   * alias for "UTF8::is_binary()"
2660 3
   *
2661 3
   * @see UTF8::is_binary()
2662 3
   *
2663 3
   * @param string $str
2664 3
   *
2665 3
   * @return bool
2666 3
   *
2667 3
   * @deprecated
2668 1
   */
2669 1
  public static function isBinary($str)
2670 3
  {
2671 3
    return self::is_binary($str);
2672 3
  }
2673
2674 3
  /**
2675 3
   * alias for "UTF8::is_bom()"
2676 3
   *
2677 3
   * @see UTF8::is_bom()
2678 3
   *
2679 3
   * @param string $utf8_chr
2680 3
   *
2681 3
   * @return boolean
2682 3
   *
2683 1
   * @deprecated
2684 1
   */
2685 3
  public static function isBom($utf8_chr)
2686 3
  {
2687 3
    return self::is_bom($utf8_chr);
2688
  }
2689 3
2690 1
  /**
2691 1
   * alias for "UTF8::is_html()"
2692
   *
2693 1
   * @see UTF8::is_html()
2694
   *
2695
   * @param string $str
2696
   *
2697 3
   * @return boolean
2698
   *
2699 3
   * @deprecated
2700
   */
2701
  public static function isHtml($str)
2702
  {
2703
    return self::is_html($str);
2704
  }
2705
2706
  /**
2707
   * alias for "UTF8::is_json()"
2708
   *
2709
   * @see UTF8::is_json()
2710
   *
2711
   * @param string $str
2712 43
   *
2713
   * @return bool
2714 43
   *
2715
   * @deprecated
2716 43
   */
2717 3
  public static function isJson($str)
2718
  {
2719
    return self::is_json($str);
2720 41
  }
2721 1
2722 1
  /**
2723
   * alias for "UTF8::is_utf16()"
2724
   *
2725
   * @see UTF8::is_utf16()
2726
   *
2727
   * @param string $str
2728
   *
2729
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2730 41
   *
2731
   * @deprecated
2732
   */
2733
  public static function isUtf16($str)
2734
  {
2735
    return self::is_utf16($str);
2736
  }
2737
2738
  /**
2739
   * alias for "UTF8::is_utf32()"
2740 41
   *
2741
   * @see UTF8::is_utf32()
2742 41
   *
2743 41
   * @param string $str
2744 41
   *
2745
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2746
   *
2747 41
   * @deprecated
2748 41
   */
2749 41
  public static function isUtf32($str)
2750
  {
2751
    return self::is_utf32($str);
2752 41
  }
2753
2754 36
  /**
2755 41
   * alias for "UTF8::is_utf8()"
2756
   *
2757 34
   * @see UTF8::is_utf8()
2758 34
   *
2759 34
   * @param string $str
2760 34
   * @param bool   $strict
2761 39
   *
2762
   * @return bool
2763 21
   *
2764 21
   * @deprecated
2765 21
   */
2766 21
  public static function isUtf8($str, $strict = false)
2767 33
  {
2768
    return self::is_utf8($str, $strict);
2769 9
  }
2770 9
2771 9
  /**
2772 9
   * Checks if a string is 7 bit ASCII.
2773 16
   *
2774
   * @param string $str <p>The string to check.</p>
2775
   *
2776
   * @return bool <p>
2777
   *              <strong>true</strong> if it is ASCII<br />
2778
   *              <strong>false</strong> otherwise
2779
   *              </p>
2780
   */
2781
  public static function is_ascii($str)
2782 3
  {
2783 3
    $str = (string)$str;
2784 3
2785 3
    if (!isset($str[0])) {
2786 9
      return true;
2787
    }
2788 3
2789 3
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2790 3
  }
2791 3
2792 3
  /**
2793
   * Returns true if the string is base64 encoded, false otherwise.
2794
   *
2795
   * @param string $str <p>The input string.</p>
2796 5
   *
2797
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2798 41
   */
2799
  public static function is_base64($str)
2800
  {
2801 36
    $str = (string)$str;
2802
2803 33
    if (!isset($str[0])) {
2804 33
      return false;
2805 33
    }
2806 33
2807
    $base64String = (string)base64_decode($str, true);
2808
    if ($base64String && base64_encode($base64String) === $str) {
2809
      return true;
2810
    }
2811 33
2812
    return false;
2813
  }
2814
2815
  /**
2816
   * Check if the input is binary... (is look like a hack).
2817 33
   *
2818 33
   * @param mixed $input
2819 33
   *
2820 33
   * @return bool
2821
   */
2822 33
  public static function is_binary($input)
2823
  {
2824 33
    $input = (string)$input;
2825 33
2826 5
    if (!isset($input[0])) {
2827
      return false;
2828
    }
2829 33
2830 33
    if (preg_match('~^[01]+$~', $input)) {
2831 33
      return true;
2832 33
    }
2833 33
2834
    $testLength = strlen($input);
2835
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2836
      return true;
2837
    }
2838 18
2839
    if (substr_count($input, "\x00") > 0) {
2840
      return true;
2841 41
    }
2842
2843 20
    return false;
2844
  }
2845
2846
  /**
2847
   * Check if the file is binary.
2848
   *
2849
   * @param string $file
2850
   *
2851
   * @return boolean
2852
   */
2853
  public static function is_binary_file($file)
2854
  {
2855
    try {
2856
      $fp = fopen($file, 'rb');
2857
      $block = fread($fp, 512);
2858
      fclose($fp);
2859
    } catch (\Exception $e) {
2860
      $block = '';
2861
    }
2862
2863
    return self::is_binary($block);
2864
  }
2865
2866
  /**
2867
   * Checks if the given string is equal to any "Byte Order Mark".
2868
   *
2869
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2870
   *
2871
   * @param string $str <p>The input string.</p>
2872
   *
2873
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2874
   */
2875
  public static function is_bom($str)
2876
  {
2877
    foreach (self::$BOM as $bomString => $bomByteLength) {
2878
      if ($str === $bomString) {
2879
        return true;
2880
      }
2881
    }
2882
2883 2
    return false;
2884
  }
2885 2
2886
  /**
2887 2
   * Check if the string contains any html-tags <lall>.
2888 2
   *
2889 2
   * @param string $str <p>The input string.</p>
2890
   *
2891
   * @return boolean
2892
   */
2893 2
  public static function is_html($str)
2894
  {
2895
    $str = (string)$str;
2896
2897
    if (!isset($str[0])) {
2898
      return false;
2899
    }
2900
2901
    // init
2902
    $matches = array();
2903
2904
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2905
2906
    if (count($matches) === 0) {
2907
      return false;
2908
    }
2909
2910
    return true;
2911
  }
2912
2913
  /**
2914
   * Try to check if "$str" is an json-string.
2915
   *
2916
   * @param string $str <p>The input string.</p>
2917
   *
2918
   * @return bool
2919
   */
2920
  public static function is_json($str)
2921
  {
2922
    $str = (string)$str;
2923
2924
    if (!isset($str[0])) {
2925
      return false;
2926
    }
2927
2928
    $json = self::json_decode($str);
2929
2930
    if (
2931
        (
2932 2
            is_object($json) === true
2933
            ||
2934 2
            is_array($json) === true
2935
        )
2936 2
        &&
2937
        json_last_error() === JSON_ERROR_NONE
2938
    ) {
2939 2
      return true;
2940
    }
2941
2942 2
    return false;
2943
  }
2944
2945
  /**
2946
   * Check if the string is UTF-16.
2947
   *
2948
   * @param string $str <p>The input string.</p>
2949
   *
2950
   * @return int|false <p>
2951
   *                   <strong>false</strong> if is't not UTF-16,<br />
2952 6
   *                   <strong>1</strong> for UTF-16LE,<br />
2953
   *                   <strong>2</strong> for UTF-16BE.
2954 6
   *                   </p>
2955
   */
2956 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2957
  {
2958
    $str = self::remove_bom($str);
2959
2960
    if (self::is_binary($str) === true) {
2961
2962
      $maybeUTF16LE = 0;
2963
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2964
      if ($test) {
2965 24
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2966
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2967 24
        if ($test3 === $test) {
2968
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 2958 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2969 24
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2970 2
            if (in_array($test3char, $strChars, true) === true) {
2971
              $maybeUTF16LE++;
2972
            }
2973
          }
2974 23
        }
2975 2
      }
2976
2977
      $maybeUTF16BE = 0;
2978 23
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2979
      if ($test) {
2980 23
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2981
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2982
        if ($test3 === $test) {
2983
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 2958 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2984
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2985
            if (in_array($test3char, $strChars, true) === true) {
2986
              $maybeUTF16BE++;
2987
            }
2988
          }
2989
        }
2990 1
      }
2991
2992 1
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2993
        if ($maybeUTF16LE > $maybeUTF16BE) {
2994
          return 1;
2995
        }
2996 1
2997
        return 2;
2998
      }
2999
3000
    }
3001
3002
    return false;
3003
  }
3004
3005
  /**
3006
   * Check if the string is UTF-32.
3007 1
   *
3008
   * @param string $str
3009 1
   *
3010 1
   * @return int|false <p>
3011 1
   *                   <strong>false</strong> if is't not UTF-16,<br />
3012
   *                   <strong>1</strong> for UTF-32LE,<br />
3013 1
   *                   <strong>2</strong> for UTF-32BE.
3014
   *                   </p>
3015
   */
3016 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3017
  {
3018
    $str = self::remove_bom($str);
3019
3020
    if (self::is_binary($str) === true) {
3021
3022 2
      $maybeUTF32LE = 0;
3023
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3024 2
      if ($test) {
3025
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3026 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3027 2
        if ($test3 === $test) {
3028 2
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 3018 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3029
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3030 2
            if (in_array($test3char, $strChars, true) === true) {
3031
              $maybeUTF32LE++;
3032
            }
3033
          }
3034
        }
3035
      }
3036
3037
      $maybeUTF32BE = 0;
3038
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3039
      if ($test) {
3040 1
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3041
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3042 1
        if ($test3 === $test) {
3043
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 3018 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3044
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3045
            if (in_array($test3char, $strChars, true) === true) {
3046 1
              $maybeUTF32BE++;
3047
            }
3048
          }
3049
        }
3050
      }
3051
3052
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3053
        if ($maybeUTF32LE > $maybeUTF32BE) {
3054
          return 1;
3055
        }
3056
3057
        return 2;
3058 1
      }
3059
3060 1
    }
3061
3062
    return false;
3063
  }
3064
3065
  /**
3066
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3067
   *
3068
   * @see    http://hsivonen.iki.fi/php-utf8/
3069
   *
3070 16
   * @param string $str    <p>The string to be checked.</p>
3071
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3072 16
   *
3073
   * @return bool
3074 16
   */
3075 2
  public static function is_utf8($str, $strict = false)
3076
  {
3077
    $str = (string)$str;
3078 16
3079 1
    if (!isset($str[0])) {
3080
      return true;
3081
    }
3082 16
3083 4
    if ($strict === true) {
3084
      if (self::is_utf16($str) !== false) {
3085
        return false;
3086 15
      }
3087 14
3088
      if (self::is_utf32($str) !== false) {
3089
        return false;
3090 4
      }
3091 4
    }
3092 4
3093
    if (self::pcre_utf8_support() !== true) {
3094
3095 4
      // If even just the first character can be matched, when the /u
3096 4
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3097 4
      // invalid, nothing at all will match, even if the string contains
3098 4
      // some valid sequences
3099 4
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3100 4
    }
3101 4
3102 4
    $mState = 0; // cached expected number of octets after the current octet
3103 4
    // until the beginning of the next UTF8 character sequence
3104 4
    $mUcs4 = 0; // cached Unicode character
3105 4
    $mBytes = 1; // cached expected number of octets in the current sequence
3106 4
3107 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3108 4
      self::checkForSupport();
3109 4
    }
3110
3111 4 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3112 4
      $len = \mb_strlen($str, '8BIT');
3113 4
    } else {
3114
      $len = strlen($str);
3115 4
    }
3116
3117 4
    /** @noinspection ForeachInvariantsInspection */
3118
    for ($i = 0; $i < $len; $i++) {
3119
      $in = ord($str[$i]);
3120
      if ($mState === 0) {
3121
        // When mState is zero we expect either a US-ASCII character or a
3122
        // multi-octet sequence.
3123
        if (0 === (0x80 & $in)) {
3124
          // US-ASCII, pass straight through.
3125
          $mBytes = 1;
3126 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3127 13
          // First octet of 2 octet sequence.
3128
          $mUcs4 = $in;
3129 13
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3130 13
          $mState = 1;
3131
          $mBytes = 2;
3132 13
        } elseif (0xE0 === (0xF0 & $in)) {
3133 1
          // First octet of 3 octet sequence.
3134 1
          $mUcs4 = $in;
3135 1
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3136
          $mState = 2;
3137 13
          $mBytes = 3;
3138 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3139
          // First octet of 4 octet sequence.
3140
          $mUcs4 = $in;
3141
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3142
          $mState = 3;
3143
          $mBytes = 4;
3144
        } elseif (0xF8 === (0xFC & $in)) {
3145
          /* First octet of 5 octet sequence.
3146
          *
3147
          * This is illegal because the encoded codepoint must be either
3148
          * (a) not the shortest form or
3149
          * (b) outside the Unicode range of 0-0x10FFFF.
3150 18
          * Rather than trying to resynchronize, we will carry on until the end
3151
          * of the sequence and let the later error handling code catch it.
3152 18
          */
3153 18
          $mUcs4 = $in;
3154
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3155 18
          $mState = 4;
3156
          $mBytes = 5;
3157 18 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3158
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3159 2
          $mUcs4 = $in;
3160
          $mUcs4 = ($mUcs4 & 1) << 30;
3161 2
          $mState = 5;
3162
          $mBytes = 6;
3163 1
        } else {
3164 1
          /* Current octet is neither in the US-ASCII range nor a legal first
3165
           * octet of a multi-octet sequence.
3166 2
           */
3167 2
          return false;
3168
        }
3169 18
      } else {
3170 18
        // When mState is non-zero, we expect a continuation of the multi-octet
3171 1
        // sequence
3172 1
        if (0x80 === (0xC0 & $in)) {
3173
          // Legal continuation.
3174 18
          $shift = ($mState - 1) * 6;
3175 18
          $tmp = $in;
3176
          $tmp = ($tmp & 0x0000003F) << $shift;
3177 18
          $mUcs4 |= $tmp;
3178
          /**
3179
           * End of the multi-octet sequence. mUcs4 now contains the final
3180
           * Unicode code point to be output
3181
           */
3182
          if (0 === --$mState) {
3183
            /*
3184
            * Check for illegal sequences and code points.
3185
            */
3186
            // From Unicode 3.1, non-shortest form is illegal
3187
            if (
3188
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3189
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3190
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3191
                (4 < $mBytes) ||
3192
                // From Unicode 3.2, surrogate characters are illegal.
3193
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3194
                // Code points outside the Unicode range are illegal.
3195
                ($mUcs4 > 0x10FFFF)
3196
            ) {
3197
              return false;
3198
            }
3199
            // initialize UTF8 cache
3200
            $mState = 0;
3201
            $mUcs4 = 0;
3202
            $mBytes = 1;
3203
          }
3204
        } else {
3205
          /**
3206
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3207
           * Incomplete multi-octet sequence.
3208
           */
3209
          return false;
3210
        }
3211
      }
3212
    }
3213
3214
    return true;
3215
  }
3216
3217
  /**
3218
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3219
   * Decodes a JSON string
3220
   *
3221
   * @link http://php.net/manual/en/function.json-decode.php
3222
   *
3223
   * @param string $json    <p>
3224
   *                        The <i>json</i> string being decoded.
3225
   *                        </p>
3226
   *                        <p>
3227
   *                        This function only works with UTF-8 encoded strings.
3228
   *                        </p>
3229
   *                        <p>PHP implements a superset of
3230 17
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3231
   *                        only supports these values when they are nested inside an array or an object.
3232 17
   *                        </p>
3233 3
   * @param bool   $assoc   [optional] <p>
3234
   *                        When <b>TRUE</b>, returned objects will be converted into
3235
   *                        associative arrays.
3236 16
   *                        </p>
3237
   * @param int    $depth   [optional] <p>
3238
   *                        User specified recursion depth.
3239
   *                        </p>
3240 16
   * @param int    $options [optional] <p>
3241
   *                        Bitmask of JSON decode options. Currently only
3242
   *                        <b>JSON_BIGINT_AS_STRING</b>
3243
   *                        is supported (default is to cast large integers as floats)
3244
   *                        </p>
3245
   *
3246
   * @return mixed the value encoded in <i>json</i> in appropriate
3247
   * PHP type. Values true, false and
3248 16
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3249 16
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3250 15
   * <i>json</i> cannot be decoded or if the encoded
3251
   * data is deeper than the recursion limit.
3252
   */
3253 9 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3254 9
  {
3255 9
    $json = (string)self::filter($json);
3256
3257 9
    if (Bootup::is_php('5.4') === true) {
3258 1
      $json = json_decode($json, $assoc, $depth, $options);
3259
    } else {
3260
      $json = json_decode($json, $assoc, $depth);
3261 9
    }
3262 4
3263
    return $json;
3264
  }
3265 9
3266 5
  /**
3267
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3268
   * Returns the JSON representation of a value.
3269 9
   *
3270
   * @link http://php.net/manual/en/function.json-encode.php
3271
   *
3272
   * @param mixed $value   <p>
3273
   *                       The <i>value</i> being encoded. Can be any type except
3274
   *                       a resource.
3275
   *                       </p>
3276
   *                       <p>
3277
   *                       All string data must be UTF-8 encoded.
3278
   *                       </p>
3279
   *                       <p>PHP implements a superset of
3280
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3281
   *                       only supports these values when they are nested inside an array or an object.
3282
   *                       </p>
3283
   * @param int   $options [optional] <p>
3284
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3285 1
   *                       <b>JSON_HEX_TAG</b>,
3286
   *                       <b>JSON_HEX_AMP</b>,
3287
   *                       <b>JSON_HEX_APOS</b>,
3288 1
   *                       <b>JSON_NUMERIC_CHECK</b>,
3289
   *                       <b>JSON_PRETTY_PRINT</b>,
3290 1
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3291 1
   *                       <b>JSON_FORCE_OBJECT</b>,
3292 1
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3293
   *                       constants is described on
3294
   *                       the JSON constants page.
3295 1
   *                       </p>
3296
   * @param int   $depth   [optional] <p>
3297
   *                       Set the maximum depth. Must be greater than zero.
3298
   *                       </p>
3299
   *
3300
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3301
   */
3302 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3303 41
  {
3304
    $value = self::filter($value);
3305
3306 41
    if (Bootup::is_php('5.5') === true) {
3307
      $json = json_encode($value, $options, $depth);
3308
    } else {
3309
      $json = json_encode($value, $options);
3310
    }
3311
3312
    return $json;
3313
  }
3314
3315
  /**
3316
   * Makes string's first char lowercase.
3317 1
   *
3318
   * @param string $str <p>The input string</p>
3319 1
   *
3320 1
   * @return string <p>The resulting string</p>
3321
   */
3322
  public static function lcfirst($str)
3323 1
  {
3324 1
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3325 1
  }
3326
3327
  /**
3328 1
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3329
   *
3330
   * @param string $str   <p>The string to be trimmed</p>
3331 1
   * @param string $chars <p>Optional characters to be stripped</p>
3332
   *
3333
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3334
   */
3335 1 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3336 1
  {
3337 1
    $str = (string)$str;
3338
3339
    if (!isset($str[0])) {
3340 1
      return '';
3341
    }
3342
3343 1
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3344
    if ($chars === INF || !$chars) {
3345
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3346
    }
3347 1
3348
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3349 1
  }
3350 1
3351 1
  /**
3352 1
   * Returns the UTF-8 character with the maximum code point in the given data.
3353 1
   *
3354
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3355
   *
3356
   * @return string <p>The character with the highest code point than others.</p>
3357
   */
3358 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3359
  {
3360
    if (is_array($arg) === true) {
3361
      $arg = implode('', $arg);
3362
    }
3363
3364
    return self::chr(max(self::codepoints($arg)));
3365 5
  }
3366
3367 5
  /**
3368
   * Calculates and returns the maximum number of bytes taken by any
3369
   * UTF-8 encoded character in the given string.
3370
   *
3371
   * @param string $str <p>The original Unicode string.</p>
3372
   *
3373
   * @return int <p>Max byte lengths of the given chars.</p>
3374
   */
3375
  public static function max_chr_width($str)
3376
  {
3377 10
    $bytes = self::chr_size_list($str);
3378
    if (count($bytes) > 0) {
3379 10
      return (int)max($bytes);
3380 10
    }
3381 5
3382 5
    return 0;
3383 10
  }
3384
3385 10
  /**
3386
   * Checks whether mbstring is available on the server.
3387
   *
3388
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3389
   */
3390
  public static function mbstring_loaded()
3391
  {
3392
    $return = extension_loaded('mbstring') ? true : false;
3393
3394
    if ($return === true) {
3395
      \mb_internal_encoding('UTF-8');
3396 1
    }
3397
3398 1
    return $return;
3399 1
  }
3400 1
3401
  /**
3402 1
   * Returns the UTF-8 character with the minimum code point in the given data.
3403 1
   *
3404 1
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3405 1
   *
3406 1
   * @return string <p>The character with the lowest code point than others.</p>
3407
   */
3408 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3409
  {
3410
    if (is_array($arg) === true) {
3411
      $arg = implode('', $arg);
3412
    }
3413
3414
    return self::chr(min(self::codepoints($arg)));
3415
  }
3416
3417
  /**
3418
   * alias for "UTF8::normalize_encoding()"
3419
   *
3420
   * @see UTF8::normalize_encoding()
3421
   *
3422
   * @param string $encoding
3423
   * @param mixed  $fallback
3424 45
   *
3425
   * @return string
3426
   *
3427 45
   * @deprecated
3428
   */
3429
  public static function normalizeEncoding($encoding, $fallback = false)
3430
  {
3431 45
    return self::normalize_encoding($encoding, $fallback);
3432 45
  }
3433 45
3434 45
  /**
3435
   * Normalize the encoding-"name" input.
3436 45
   *
3437
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3438
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3439 45
   *
3440 45
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3441
   */
3442 45
  public static function normalize_encoding($encoding, $fallback = false)
3443
  {
3444
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3445
3446
    if (!$encoding) {
3447
      return $fallback;
3448
    }
3449
3450
    if ('UTF-8' === $encoding) {
3451
      return $encoding;
3452
    }
3453 45
3454
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3455 45
      return $encoding;
3456
    }
3457 45
3458 45
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3459 45
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3460
    }
3461 45
3462 45
    $encodingOrig = $encoding;
3463 45
    $encoding = strtoupper($encoding);
3464
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3465 45
3466
    $equivalences = array(
3467
        'ISO88591'    => 'ISO-8859-1',
3468
        'ISO8859'     => 'ISO-8859-1',
3469
        'ISO'         => 'ISO-8859-1',
3470
        'LATIN1'      => 'ISO-8859-1',
3471
        'LATIN'       => 'ISO-8859-1',
3472
        'WIN1252'     => 'ISO-8859-1',
3473
        'WINDOWS1252' => 'ISO-8859-1',
3474
        'UTF16'       => 'UTF-16',
3475
        'UTF32'       => 'UTF-32',
3476 23
        'UTF8'        => 'UTF-8',
3477
        'UTF'         => 'UTF-8',
3478 23
        'UTF7'        => 'UTF-7',
3479
        '8BIT'        => 'CP850',
3480 23
        'BINARY'      => 'CP850',
3481 5
    );
3482
3483
    if (!empty($equivalences[$encodingUpperHelper])) {
3484
      $encoding = $equivalences[$encodingUpperHelper];
3485 19
    }
3486 3
3487
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3488
3489 18
    return $encoding;
3490
  }
3491 18
3492
  /**
3493
   * Normalize some MS Word special characters.
3494
   *
3495
   * @param string $str <p>The string to be normalized.</p>
3496
   *
3497
   * @return string
3498
   */
3499 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3500
  {
3501
    // init
3502 52
    $str = (string)$str;
3503
3504 52
    if (!isset($str[0])) {
3505
      return '';
3506 52
    }
3507
3508 52
    static $UTF8_MSWORD_KEYS_CACHE = null;
3509 40
    static $UTF8_MSWORD_VALUES_CACHE = null;
3510
3511
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3512 18
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3513
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3514
    }
3515 18
3516 17
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3517
  }
3518 17
3519 17
  /**
3520 17
   * Normalize the whitespace.
3521 2
   *
3522 2
   * @param string $str                     <p>The string to be normalized.</p>
3523
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3524
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3525 18
   *                                        bidirectional text chars.</p>
3526
   *
3527 18
   * @return string
3528 18
   */
3529 18
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3530
  {
3531 18
    // init
3532 18
    $str = (string)$str;
3533 18
3534
    if (!isset($str[0])) {
3535
      return '';
3536
    }
3537 18
3538
    static $WHITESPACE_CACHE = array();
3539 18
    $cacheKey = (int)$keepNonBreakingSpace;
3540
3541
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3542
3543
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3544
3545
      if ($keepNonBreakingSpace === true) {
3546
        /** @noinspection OffsetOperationsInspection */
3547
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3548
      }
3549
3550
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3551
    }
3552
3553
    if ($keepBidiUnicodeControls === false) {
3554
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3555
3556
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3557
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3558
      }
3559
3560 1
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3561
    }
3562 1
3563 1
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3564
  }
3565
3566
  /**
3567
   * Strip all whitespace characters. This includes tabs and newline
3568 1
   * characters, as well as multibyte whitespace such as the thin space
3569 1
   * and ideographic space.
3570 1
   *
3571 1
   * @param string $str
3572
   *
3573
   * @return string
3574 1
   */
3575
  public static function strip_whitespace($str)
3576
  {
3577
    // init
3578
    $str = (string)$str;
3579
3580
    if (!isset($str[0])) {
3581
      return '';
3582
    }
3583
3584
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3585
  }
3586 36
3587
  /**
3588 36
   * Format a number with grouped thousands.
3589
   *
3590 36
   * @param float  $number
3591 2
   * @param int    $decimals
3592
   * @param string $dec_point
3593
   * @param string $thousands_sep
3594
   *
3595 36
   * @return string
3596 36
   *    *
3597
   * @deprecated Because this has nothing to do with UTF8. :/
3598 36
   */
3599
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3600
  {
3601
    $thousands_sep = (string)$thousands_sep;
3602 36
    $dec_point = (string)$dec_point;
3603
    $number = (float)$number;
3604 36
3605 6
    if (
3606 6
        isset($thousands_sep[1], $dec_point[1])
3607
        &&
3608 36
        Bootup::is_php('5.4') === true
3609 36
    ) {
3610 36
      return str_replace(
3611 36
          array(
3612 36
              '.',
3613
              ',',
3614 36
          ),
3615
          array(
3616
              $dec_point,
3617
              $thousands_sep,
3618
          ),
3619
          number_format($number, $decimals, '.', ',')
3620
      );
3621
    }
3622
3623
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3624
  }
3625
3626
  /**
3627
   * Calculates Unicode code point of the given UTF-8 encoded character.
3628
   *
3629
   * INFO: opposite to UTF8::chr()
3630
   *
3631
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3632
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3633
   *
3634
   * @return int <p>
3635
   *             Unicode code point of the given character,<br />
3636
   *             0 on invalid UTF-8 byte sequence.
3637
   *             </p>
3638
   */
3639
  public static function ord($chr, $encoding = 'UTF-8')
3640
  {
3641
3642
    if ($encoding !== 'UTF-8') {
3643
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3644
3645
      // check again, if it's still not UTF-8
3646 36
      /** @noinspection NotOptimalIfConditionsInspection */
3647 5
      if ($encoding !== 'UTF-8') {
3648
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3649 5
      }
3650 5
    }
3651
3652
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3653 36
      self::checkForSupport();
3654
    }
3655
3656
    if (self::$SUPPORT['intlChar'] === true) {
3657 36
      $tmpReturn = \IntlChar::ord($chr);
3658
      if ($tmpReturn) {
3659
        return $tmpReturn;
3660
      }
3661
    }
3662
3663
    // use static cache, if there is no support for "\IntlChar"
3664
    static $CHAR_CACHE = array();
3665
    if (isset($CHAR_CACHE[$chr]) === true) {
3666
      return $CHAR_CACHE[$chr];
3667
    }
3668
3669
    $chr_orig = $chr;
3670 12
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3671
    $chr = unpack('C*', self::substr($chr, 0, 4, '8BIT'));
3672
    $code = $chr ? $chr[1] : 0;
3673
3674
    if (0xF0 <= $code && isset($chr[4])) {
3675
      return $CHAR_CACHE[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3676 12
    }
3677 2
3678 1
    if (0xE0 <= $code && isset($chr[3])) {
3679 2
      return $CHAR_CACHE[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3680 1
    }
3681 2
3682
    if (0xC0 <= $code && isset($chr[2])) {
3683 2
      return $CHAR_CACHE[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3684
    }
3685
3686 2
    return $CHAR_CACHE[$chr_orig] = $code;
3687
  }
3688
3689
  /**
3690
   * Parses the string into an array (into the the second parameter).
3691
   *
3692 12
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3693 3
   *          if the second parameter is not set!
3694
   *
3695
   * @link http://php.net/manual/en/function.parse-str.php
3696
   *
3697
   * @param string  $str       <p>The input string.</p>
3698
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3699
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3700 12
   *
3701 9
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3702
   */
3703
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3704
  {
3705
    if ($cleanUtf8 === true) {
3706
      $str = self::clean($str);
3707
    }
3708
3709
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3710 6
    $return = \mb_parse_str($str, $result);
3711 6
    if ($return === false || empty($result)) {
3712 6
      return false;
3713 6
    }
3714 6
3715 6
    return true;
3716 6
  }
3717 6
3718 6
  /**
3719 6
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3720 6
   *
3721 6
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3722 6
   */
3723 6
  public static function pcre_utf8_support()
3724 6
  {
3725 6
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3726 6
    return (bool)@preg_match('//u', '');
3727 6
  }
3728 6
3729 6
  /**
3730 6
   * Create an array containing a range of UTF-8 characters.
3731
   *
3732 6
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3733 6
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3734 6
   *
3735
   * @return array
3736
   */
3737
  public static function range($var1, $var2)
3738
  {
3739
    if (!$var1 || !$var2) {
3740
      return array();
3741
    }
3742
3743 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3744
      $start = (int)$var1;
3745
    } elseif (ctype_xdigit($var1)) {
3746
      $start = (int)self::hex_to_int($var1);
3747
    } else {
3748
      $start = self::ord($var1);
3749
    }
3750
3751
    if (!$start) {
3752
      return array();
3753
    }
3754
3755 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3756
      $end = (int)$var2;
3757
    } elseif (ctype_xdigit($var2)) {
3758
      $end = (int)self::hex_to_int($var2);
3759
    } else {
3760
      $end = self::ord($var2);
3761
    }
3762
3763
    if (!$end) {
3764
      return array();
3765
    }
3766
3767
    return array_map(
3768
        array(
3769
            '\\voku\\helper\\UTF8',
3770
            'chr',
3771
        ),
3772
        range($start, $end)
3773
    );
3774
  }
3775
3776
  /**
3777
   * Multi decode html entity & fix urlencoded-win1252-chars.
3778 14
   *
3779
   * e.g:
3780 14
   * 'test+test'                     => 'test+test'
3781
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3782
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3783 14
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3784 14
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3785 1
   * 'Düsseldorf'                   => 'Düsseldorf'
3786 1
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3787 13
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3788
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3789 14
   *
3790
   * @param string $str          <p>The input string.</p>
3791 14
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3792 14
   *
3793
   * @return string
3794 14
   */
3795 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3796
  {
3797
    $str = (string)$str;
3798
3799
    if (!isset($str[0])) {
3800
      return '';
3801
    }
3802
3803
    $pattern = '/%u([0-9a-f]{3,4})/i';
3804
    if (preg_match($pattern, $str)) {
3805
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3806 1
    }
3807
3808 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3809
3810 1
    do {
3811
      $str_compare = $str;
3812
3813
      $str = self::fix_simple_utf8(
3814 1
          rawurldecode(
3815
              self::html_entity_decode(
3816 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3817
                  $flags
3818
              )
3819
          )
3820 1
      );
3821 1
3822
    } while ($multi_decode === true && $str_compare !== $str);
3823
3824 1
    return (string)$str;
3825 1
  }
3826 1
3827 1
  /**
3828
   * alias for "UTF8::remove_bom()"
3829 1
   *
3830
   * @see UTF8::remove_bom()
3831
   *
3832 1
   * @param string $str
3833
   *
3834
   * @return string
3835 1
   *
3836
   * @deprecated
3837
   */
3838
  public static function removeBOM($str)
3839
  {
3840
    return self::remove_bom($str);
3841
  }
3842
3843
  /**
3844
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3845
   *
3846
   * @param string $str <p>The input string.</p>
3847
   *
3848
   * @return string <p>String without UTF-BOM</p>
3849
   */
3850
  public static function remove_bom($str)
3851 2
  {
3852
    $str = (string)$str;
3853 2
3854
    if (!isset($str[0])) {
3855
      return '';
3856 2
    }
3857 2
3858
    foreach (self::$BOM as $bomString => $bomByteLength) {
3859 2
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::substr($str, $bomByteLength, null, '8BIT') on line 3860 can also be of type false; however, voku\helper\UTF8::strpos() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3860
        $str = self::substr($str, $bomByteLength, null, '8BIT');
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::substr($str, $bomByteLength, null, '8BIT') on line 3860 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3861 2
      }
3862 2
    }
3863
3864 2
    return $str;
3865
  }
3866
3867 2
  /**
3868 2
   * Removes duplicate occurrences of a string in another string.
3869 2
   *
3870 2
   * @param string          $str  <p>The base string.</p>
3871 2
   * @param string|string[] $what <p>String to search for in the base string.</p>
3872
   *
3873 2
   * @return string <p>The result string with removed duplicates.</p>
3874 2
   */
3875 2
  public static function remove_duplicates($str, $what = ' ')
3876 2
  {
3877 2
    if (is_string($what) === true) {
3878 2
      $what = array($what);
3879
    }
3880 2
3881 2
    if (is_array($what) === true) {
3882 2
      /** @noinspection ForeachSourceInspection */
3883 2
      foreach ($what as $item) {
3884 2
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3885 2
      }
3886
    }
3887 2
3888
    return $str;
3889
  }
3890 2
3891
  /**
3892
   * Remove invisible characters from a string.
3893
   *
3894
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3895
   *
3896
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3897
   *
3898
   * @param string $str
3899
   * @param bool   $url_encoded
3900
   * @param string $replacement
3901
   *
3902
   * @return string
3903
   */
3904
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3905
  {
3906
    // init
3907
    $non_displayables = array();
3908
3909
    // every control character except newline (dec 10),
3910
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3911 1
    if ($url_encoded) {
3912
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3913 1
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3914
    }
3915 1
3916
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3917
3918
    do {
3919
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3920
    } while ($count !== 0);
3921
3922
    return $str;
3923
  }
3924
3925
  /**
3926
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3927
   *
3928
   * @param string $str                <p>The input string</p>
3929
   * @param string $replacementChar    <p>The replacement character.</p>
3930
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3931
   *
3932
   * @return string
3933
   */
3934
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
3935
  {
3936
    $str = (string)$str;
3937
3938
    if (!isset($str[0])) {
3939
      return '';
3940
    }
3941
3942
    if ($processInvalidUtf8 === true) {
3943
      $replacementCharHelper = $replacementChar;
3944
      if ($replacementChar === '') {
3945
        $replacementCharHelper = 'none';
3946
      }
3947 12
3948
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3949 12
        self::checkForSupport();
3950
      }
3951
3952
      if (self::$SUPPORT['mbstring'] === false) {
3953
        trigger_error('UTF8::replace_diamond_question_mark() without mbstring cannot handle all chars correctly', E_USER_WARNING);
3954
      }
3955
3956
      $save = \mb_substitute_character();
3957
      \mb_substitute_character($replacementCharHelper);
3958
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3959 1
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3960
      \mb_substitute_character($save);
3961 1
    }
3962
3963 1
    return str_replace(
3964
        array(
3965 1
            "\xEF\xBF\xBD",
3966
            '�',
3967
        ),
3968
        array(
3969
            $replacementChar,
3970
            $replacementChar,
3971
        ),
3972
        $str
3973
    );
3974
  }
3975
3976
  /**
3977 1
   * Strip whitespace or other characters from end of a UTF-8 string.
3978
   *
3979 1
   * @param string $str   <p>The string to be trimmed.</p>
3980
   * @param string $chars <p>Optional characters to be stripped.</p>
3981 1
   *
3982 1
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3983 1
   */
3984 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3985 1
  {
3986 1
    $str = (string)$str;
3987 1
3988 1
    if (!isset($str[0])) {
3989
      return '';
3990
    }
3991 1
3992
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3993
    if ($chars === INF || !$chars) {
3994
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3995
    }
3996
3997
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3998
  }
3999
4000
  /**
4001
   * rxClass
4002 21
   *
4003
   * @param string $s
4004
   * @param string $class
4005 21
   *
4006 21
   * @return string
4007
   */
4008 21
  private static function rxClass($s, $class = '')
4009 1
  {
4010
    static $RX_CLASSS_CACHE = array();
4011
4012 20
    $cacheKey = $s . $class;
4013
4014
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4015
      return $RX_CLASSS_CACHE[$cacheKey];
4016 20
    }
4017 20
4018
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4019 20
    $class = array($class);
4020 20
4021
    /** @noinspection SuspiciousLoopInspection */
4022
    foreach (self::str_split($s) as $s) {
4023 1
      if ('-' === $s) {
4024 1
        $class[0] = '-' . $class[0];
4025
      } elseif (!isset($s[2])) {
4026
        $class[0] .= preg_quote($s, '/');
4027 1
      } elseif (1 === self::strlen($s)) {
4028 1
        $class[0] .= $s;
4029 1
      } else {
4030 1
        $class[] = $s;
4031 1
      }
4032
    }
4033 1
4034
    if ($class[0]) {
4035 1
      $class[0] = '[' . $class[0] . ']';
4036
    }
4037
4038
    if (1 === count($class)) {
4039
      $return = $class[0];
4040
    } else {
4041
      $return = '(?:' . implode('|', $class) . ')';
4042
    }
4043
4044
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4045 1
4046
    return $return;
4047 1
  }
4048
4049 1
  /**
4050
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
4051 1
   */
4052
  public static function showSupport()
4053
  {
4054
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4055
      self::checkForSupport();
4056
    }
4057
4058
    foreach (self::$SUPPORT as $utf8Support) {
4059
      echo $utf8Support . "\n<br>";
4060
    }
4061
  }
4062
4063
  /**
4064
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4065 7
   *
4066
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4067 7
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4068
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4069
   *
4070
   * @return string <p>The HTML numbered entity.</p>
4071
   */
4072
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4073
  {
4074
    // init
4075
    $char = (string)$char;
4076
4077
    if (!isset($char[0])) {
4078
      return '';
4079
    }
4080
4081
    if (
4082
        $keepAsciiChars === true
4083 1
        &&
4084
        self::is_ascii($char) === true
4085 1
    ) {
4086 1
      return $char;
4087
    }
4088 1
4089
    if ($encoding !== 'UTF-8') {
4090 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4091
    }
4092 1
4093 1
    return '&#' . self::ord($char, $encoding) . ';';
4094 1
  }
4095 1
4096
  /**
4097 1
   * Convert a string to an array of Unicode characters.
4098
   *
4099 1
   * @param string  $str       <p>The string to split into array.</p>
4100 1
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4101 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4102 1
   *
4103 1
   * @return string[] <p>An array containing chunks of the string.</p>
4104 1
   */
4105
  public static function split($str, $length = 1, $cleanUtf8 = false)
4106 1
  {
4107
    $str = (string)$str;
4108 1
4109
    if (!isset($str[0])) {
4110
      return array();
4111
    }
4112 1
4113
    // init
4114
    $ret = array();
4115
4116
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4117
      self::checkForSupport();
4118
    }
4119
4120
    if ($cleanUtf8 === true) {
4121
      $str = self::clean($str);
4122
    }
4123
4124
    if (self::$SUPPORT['pcre_utf8'] === true) {
4125
4126
      preg_match_all('/./us', $str, $retArray);
4127
      if (isset($retArray[0])) {
4128
        $ret = $retArray[0];
4129 9
      }
4130
      unset($retArray);
4131 9
4132
    } else {
4133
4134
      // fallback
4135
4136
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4137
        self::checkForSupport();
4138
      }
4139
4140 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4141
        $len = \mb_strlen($str, '8BIT');
4142
      } else {
4143
        $len = strlen($str);
4144
      }
4145
4146
      /** @noinspection ForeachInvariantsInspection */
4147 1
      for ($i = 0; $i < $len; $i++) {
4148
4149 1
        if (($str[$i] & "\x80") === "\x00") {
4150
4151
          $ret[] = $str[$i];
4152
4153
        } elseif (
4154
            isset($str[$i + 1])
4155
            &&
4156
            ($str[$i] & "\xE0") === "\xC0"
4157
        ) {
4158
4159
          if (($str[$i + 1] & "\xC0") === "\x80") {
4160
            $ret[] = $str[$i] . $str[$i + 1];
4161
4162
            $i++;
4163
          }
4164 12
4165 View Code Duplication
        } elseif (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4166 12
            isset($str[$i + 2])
4167 11
            &&
4168 11
            ($str[$i] & "\xF0") === "\xE0"
4169 12
        ) {
4170
4171
          if (
4172
              ($str[$i + 1] & "\xC0") === "\x80"
4173
              &&
4174
              ($str[$i + 2] & "\xC0") === "\x80"
4175
          ) {
4176
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4177
4178
            $i += 2;
4179
          }
4180
4181
        } elseif (
4182 9
            isset($str[$i + 3])
4183
            &&
4184 9
            ($str[$i] & "\xF8") === "\xF0"
4185 1
        ) {
4186
4187 View Code Duplication
          if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4188 8
              ($str[$i + 1] & "\xC0") === "\x80"
4189 2
              &&
4190 2
              ($str[$i + 2] & "\xC0") === "\x80"
4191
              &&
4192 8
              ($str[$i + 3] & "\xC0") === "\x80"
4193 8
          ) {
4194 1
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4195
4196
            $i += 3;
4197 7
          }
4198
4199 7
        }
4200
      }
4201
    }
4202 1
4203
    if ($length > 1) {
4204
      $ret = array_chunk($ret, $length);
4205
4206
      return array_map(
4207
          function ($item) {
4208
            return implode('', $item);
4209
          }, $ret
4210
      );
4211
    }
4212
4213
    /** @noinspection OffsetOperationsInspection */
4214
    if (isset($ret[0]) && $ret[0] === '') {
4215
      return array();
4216
    }
4217
4218 1
    return $ret;
4219
  }
4220 1
4221
  /**
4222
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4223
   *
4224
   * @param string $str <p>The input string.</p>
4225
   *
4226
   * @return false|string <p>
4227
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4228
   *                      otherwise it will return false.
4229
   *                      </p>
4230
   */
4231
  public static function str_detect_encoding($str)
4232 2
  {
4233
    //
4234 2
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4235 2
    //
4236
4237 2
    if (self::is_binary($str) === true) {
4238 2
4239 2
      if (self::is_utf16($str) === 1) {
4240
        return 'UTF-16LE';
4241 2
      }
4242 2
4243
      if (self::is_utf16($str) === 2) {
4244
        return 'UTF-16BE';
4245
      }
4246
4247
      if (self::is_utf32($str) === 1) {
4248
        return 'UTF-32LE';
4249
      }
4250
4251
      if (self::is_utf32($str) === 2) {
4252 3
        return 'UTF-32BE';
4253
      }
4254 3
4255 3
    }
4256 3
4257
    //
4258 3
    // 2.) simple check for ASCII chars
4259
    //
4260 3
4261
    if (self::is_ascii($str) === true) {
4262
      return 'ASCII';
4263
    }
4264
4265
    //
4266
    // 3.) simple check for UTF-8 chars
4267
    //
4268
4269
    if (self::is_utf8($str) === true) {
4270
      return 'UTF-8';
4271
    }
4272
4273
    //
4274
    // 4.) check via "\mb_detect_encoding()"
4275
    //
4276
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4277
4278
    $detectOrder = array(
4279
        'ISO-8859-1',
4280
        'ISO-8859-2',
4281
        'ISO-8859-3',
4282 2
        'ISO-8859-4',
4283
        'ISO-8859-5',
4284
        'ISO-8859-6',
4285 2
        'ISO-8859-7',
4286
        'ISO-8859-8',
4287 2
        'ISO-8859-9',
4288
        'ISO-8859-10',
4289
        'ISO-8859-13',
4290
        'ISO-8859-14',
4291
        'ISO-8859-15',
4292
        'ISO-8859-16',
4293
        'WINDOWS-1251',
4294
        'WINDOWS-1252',
4295
        'WINDOWS-1254',
4296
        'ISO-2022-JP',
4297
        'JIS',
4298
        'EUC-JP',
4299
    );
4300
4301
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4302
    if ($encoding) {
4303
      return $encoding;
4304
    }
4305
4306
    //
4307
    // 5.) check via "iconv()"
4308
    //
4309
4310
    $md5 = md5($str);
4311
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4312
      # INFO: //IGNORE and //TRANSLIT still throw notice
4313
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4314 8
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4315
        return $encodingTmp;
4316 8
      }
4317 8
    }
4318
4319 8
    return false;
4320 3
  }
4321
4322
  /**
4323 7
   * Check if the string ends with the given substring.
4324 1
   *
4325 1
   * @param string $haystack <p>The string to search in.</p>
4326 1
   * @param string $needle   <p>The substring to search for.</p>
4327
   *
4328
   * @return bool
4329
   */
4330 7 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4331 1
  {
4332 7
    $haystack = (string)$haystack;
4333 7
    $needle = (string)$needle;
4334 7
4335
    if (!isset($haystack[0], $needle[0])) {
4336
      return false;
4337
    }
4338 7
4339
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4340
      return true;
4341
    }
4342
4343
    return false;
4344
  }
4345
4346
  /**
4347
   * Check if the string ends with the given substring, case insensitive.
4348
   *
4349
   * @param string $haystack <p>The string to search in.</p>
4350
   * @param string $needle   <p>The substring to search for.</p>
4351
   *
4352
   * @return bool
4353
   */
4354 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4355 8
  {
4356
    $haystack = (string)$haystack;
4357 8
    $needle = (string)$needle;
4358 2
4359
    if (!isset($haystack[0], $needle[0])) {
4360
      return false;
4361 6
    }
4362
4363
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4364
      return true;
4365 6
    }
4366
4367
    return false;
4368
  }
4369
4370
  /**
4371
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4372 6
   *
4373
   * @link  http://php.net/manual/en/function.str-ireplace.php
4374
   *
4375
   * @param mixed $search  <p>
4376
   *                       Every replacement with search array is
4377
   *                       performed on the result of previous replacement.
4378
   *                       </p>
4379
   * @param mixed $replace <p>
4380
   *                       </p>
4381
   * @param mixed $subject <p>
4382
   *                       If subject is an array, then the search and
4383
   *                       replace is performed with every entry of
4384
   *                       subject, and the return value is an array as
4385
   *                       well.
4386
   *                       </p>
4387 62
   * @param int   $count   [optional] <p>
4388
   *                       The number of matched and replaced needles will
4389 62
   *                       be returned in count which is passed by
4390
   *                       reference.
4391 62
   *                       </p>
4392 4
   *
4393
   * @return mixed <p>A string or an array of replacements.</p>
4394
   */
4395
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4396
  {
4397 61
    $search = (array)$search;
4398 2
4399 61
    /** @noinspection AlterInForeachInspection */
4400 60
    foreach ($search as &$s) {
4401 60
      if ('' === $s .= '') {
4402 2
        $s = '/^(?<=.)$/';
4403
      } else {
4404
        $s = '/' . preg_quote($s, '/') . '/ui';
4405
      }
4406 61
    }
4407 61
4408 1
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4409
    $count = $replace; // used as reference parameter
4410
4411 61
    return $subject;
4412 2
  }
4413 2
4414
  /**
4415 61
   * Check if the string starts with the given substring, case insensitive.
4416
   *
4417
   * @param string $haystack <p>The string to search in.</p>
4418
   * @param string $needle   <p>The substring to search for.</p>
4419
   *
4420
   * @return bool
4421
   */
4422 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4423
  {
4424
    $haystack = (string)$haystack;
4425
    $needle = (string)$needle;
4426
4427
    if (!isset($haystack[0], $needle[0])) {
4428
      return false;
4429
    }
4430 1
4431
    if (self::stripos($haystack, $needle) === 0) {
4432 1
      return true;
4433
    }
4434
4435
    return false;
4436
  }
4437
4438
  /**
4439
   * Limit the number of characters in a string, but also after the next word.
4440
   *
4441
   * @param string $str
4442
   * @param int    $length
4443
   * @param string $strAddOn
4444
   *
4445
   * @return string
4446
   */
4447
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4448
  {
4449 2
    $str = (string)$str;
4450
4451 2
    if (!isset($str[0])) {
4452
      return '';
4453
    }
4454
4455
    $length = (int)$length;
4456
4457
    if (self::strlen($str) <= $length) {
4458
      return $str;
4459
    }
4460
4461
    if (self::substr($str, $length - 1, 1) === ' ') {
4462
      return self::substr($str, 0, $length - 1) . $strAddOn;
4463
    }
4464
4465
    $str = self::substr($str, 0, $length);
4466
    $array = explode(' ', $str);
4467 1
    array_pop($array);
4468
    $new_str = implode(' ', $array);
4469 1
4470
    if ($new_str === '') {
4471
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4472
    } else {
4473
      $str = $new_str . $strAddOn;
4474
    }
4475
4476
    return $str;
4477
  }
4478
4479
  /**
4480
   * Pad a UTF-8 string to given length with another string.
4481
   *
4482
   * @param string $str        <p>The input string.</p>
4483
   * @param int    $pad_length <p>The length of return string.</p>
4484
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4485 2
   * @param int    $pad_type   [optional] <p>
4486
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4487 2
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4488 2
   *                           </p>
4489
   *
4490 2
   * @return string <strong>Returns the padded string</strong>
4491
   */
4492
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4493
  {
4494
    $str_length = self::strlen($str);
4495
4496
    if (
4497
        is_int($pad_length) === true
4498
        &&
4499
        $pad_length > 0
4500
        &&
4501
        $pad_length >= $str_length
4502
    ) {
4503 1
      $ps_length = self::strlen($pad_string);
4504
4505 1
      $diff = $pad_length - $str_length;
4506 1
4507
      switch ($pad_type) {
4508 1 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4509 1
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4510
          $pre = self::substr($pre, 0, $diff);
4511
          $post = '';
4512 1
          break;
4513 1
4514
        case STR_PAD_BOTH:
4515 1
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4516
          $pre = self::substr($pre, 0, (int)$diff / 2);
4517
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4518
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4519
          break;
4520
4521
        case STR_PAD_RIGHT:
4522 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4523
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4524
          $post = self::substr($post, 0, $diff);
4525
          $pre = '';
4526
      }
4527
4528
      return $pre . $str . $post;
4529
    }
4530
4531
    return $str;
4532
  }
4533
4534
  /**
4535 15
   * Repeat a string.
4536
   *
4537 15
   * @param string $str        <p>
4538 15
   *                           The string to be repeated.
4539
   *                           </p>
4540 15
   * @param int    $multiplier <p>
4541 2
   *                           Number of time the input string should be
4542
   *                           repeated.
4543
   *                           </p>
4544
   *                           <p>
4545 14
   *                           multiplier has to be greater than or equal to 0.
4546
   *                           If the multiplier is set to 0, the function
4547
   *                           will return an empty string.
4548
   *                           </p>
4549 14
   *
4550
   * @return string <p>The repeated string.</p>
4551
   */
4552
  public static function str_repeat($str, $multiplier)
4553 14
  {
4554
    $str = self::filter($str);
4555
4556 2
    return str_repeat($str, $multiplier);
4557 2
  }
4558 2
4559
  /**
4560 14
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4561
   *
4562
   * Replace all occurrences of the search string with the replacement string
4563
   *
4564
   * @link http://php.net/manual/en/function.str-replace.php
4565
   *
4566 14
   * @param mixed $search  <p>
4567 2
   *                       The value being searched for, otherwise known as the needle.
4568 14
   *                       An array may be used to designate multiple needles.
4569 14
   *                       </p>
4570 14
   * @param mixed $replace <p>
4571 1
   *                       The replacement value that replaces found search
4572
   *                       values. An array may be used to designate multiple replacements.
4573
   *                       </p>
4574 14
   * @param mixed $subject <p>
4575 14
   *                       The string or array being searched and replaced on,
4576
   *                       otherwise known as the haystack.
4577
   *                       </p>
4578
   *                       <p>
4579
   *                       If subject is an array, then the search and
4580
   *                       replace is performed with every entry of
4581
   *                       subject, and the return value is an array as
4582
   *                       well.
4583
   *                       </p>
4584
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4585
   *
4586
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4587
   */
4588
  public static function str_replace($search, $replace, $subject, &$count = null)
4589
  {
4590
    return str_replace($search, $replace, $subject, $count);
4591
  }
4592
4593
  /**
4594
   * Replace the first "$search"-term with the "$replace"-term.
4595
   *
4596
   * @param string $search
4597
   * @param string $replace
4598
   * @param string $subject
4599
   *
4600
   * @return string
4601
   */
4602
  public static function str_replace_first($search, $replace, $subject)
4603
  {
4604
    $pos = self::strpos($subject, $search);
4605
4606
    if ($pos !== false) {
4607
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4608
    }
4609
4610
    return $subject;
4611
  }
4612
4613
  /**
4614
   * Shuffles all the characters in the string.
4615
   *
4616
   * @param string $str <p>The input string</p>
4617
   *
4618
   * @return string <p>The shuffled string.</p>
4619
   */
4620 1
  public static function str_shuffle($str)
4621
  {
4622 1
    $array = self::split($str);
4623 1
4624 1
    shuffle($array);
4625
4626 1
    return implode('', $array);
4627
  }
4628
4629
  /**
4630
   * Sort all characters according to code points.
4631
   *
4632
   * @param string $str    <p>A UTF-8 string.</p>
4633 1
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4634
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4635
   *
4636
   * @return string <p>String of sorted characters.</p>
4637
   */
4638
  public static function str_sort($str, $unique = false, $desc = false)
4639
  {
4640
    $array = self::codepoints($str);
4641
4642
    if ($unique) {
4643 4
      $array = array_flip(array_flip($array));
4644
    }
4645 4
4646
    if ($desc) {
4647 4
      arsort($array);
4648 2
    } else {
4649
      asort($array);
4650
    }
4651 3
4652
    return self::string($array);
4653
  }
4654
4655
  /**
4656
   * Split a string into an array.
4657
   *
4658
   * @param string $str
4659
   * @param int    $len
4660
   *
4661
   * @return array
4662
   */
4663
  public static function str_split($str, $len = 1)
4664
  {
4665
    // init
4666
    $len = (int)$len;
4667
    $str = (string)$str;
4668
4669
    if (!isset($str[0])) {
4670
      return array();
4671
    }
4672
4673
    if ($len < 1) {
4674
      return str_split($str, $len);
4675
    }
4676
4677 1
    /** @noinspection PhpInternalEntityUsedInspection */
4678
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4679 1
    $a = $a[0];
4680 1
4681 1
    if ($len === 1) {
4682
      return $a;
4683 1
    }
4684
4685
    $arrayOutput = array();
4686
    $p = -1;
4687
4688
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4689
    foreach ($a as $l => $a) {
4690 1
      if ($l % $len) {
4691
        $arrayOutput[$p] .= $a;
4692
      } else {
4693
        $arrayOutput[++$p] = $a;
4694
      }
4695
    }
4696
4697
    return $arrayOutput;
4698
  }
4699
4700
  /**
4701
   * Check if the string starts with the given substring.
4702
   *
4703
   * @param string $haystack <p>The string to search in.</p>
4704
   * @param string $needle   <p>The substring to search for.</p>
4705
   *
4706
   * @return bool
4707 1
   */
4708 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4709 1
  {
4710
    $haystack = (string)$haystack;
4711
    $needle = (string)$needle;
4712
4713
    if (!isset($haystack[0], $needle[0])) {
4714
      return false;
4715
    }
4716
4717
    if (self::strpos($haystack, $needle) === 0) {
4718
      return true;
4719
    }
4720
4721
    return false;
4722
  }
4723
4724
  /**
4725
   * Get a binary representation of a specific string.
4726
   *
4727
   * @param string $str <p>The input string.</p>
4728
   *
4729 11
   * @return string
4730
   */
4731 11
  public static function str_to_binary($str)
4732
  {
4733 11
    $str = (string)$str;
4734 2
4735 2
    $value = unpack('H*', $str);
4736
4737 11
    return base_convert($value[1], 16, 2);
4738
  }
4739 11
4740 2
  /**
4741
   * Convert a string into an array of words.
4742
   *
4743
   * @param string   $str
4744 10
   * @param string   $charlist
4745 10
   * @param bool     $removeEmptyValues
4746
   * @param null|int $removeShortValues
4747
   *
4748
   * @return array
4749 10
   */
4750
  public static function str_to_words($str, $charlist = '', $removeEmptyValues = false, $removeShortValues = null)
4751 10
  {
4752
    // init
4753
    $str = (string)$str;
4754 3
4755 3
    if ($removeShortValues !== null) {
4756 3
      $removeShortValues = (int)$removeShortValues;
4757
    }
4758 10
4759
    if (!isset($str[0])) {
4760
      if ($removeEmptyValues === true) {
4761
        return array();
4762
      }
4763
4764 10
      return array('');
4765 1
    }
4766 10
4767 10
    $charlist = self::rxClass($charlist, '\pL');
4768 10
4769 1
    $return = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4770
4771
    if (
4772
        $removeShortValues === null
4773
        &&
4774 10
        $removeEmptyValues === false
4775 10
    ) {
4776 10
      return $return;
4777 10
    }
4778
4779
    $tmpReturn = array();
4780
    foreach ($return as $returnValue) {
4781
      if (
4782
          $removeShortValues !== null
4783
          &&
4784
          self::strlen($returnValue) <= $removeShortValues
4785
      ) {
4786
        continue;
4787
      }
4788
4789
      if (
4790
          $removeEmptyValues === true
4791
          &&
4792
          trim($returnValue) === ''
4793
      ) {
4794
        continue;
4795
      }
4796
4797
      $tmpReturn[] = $returnValue;
4798
    }
4799
4800
    return $tmpReturn;
4801
  }
4802
4803
  /**
4804
   * alias for "UTF8::to_ascii()"
4805
   *
4806
   * @see UTF8::to_ascii()
4807
   *
4808
   * @param string $str
4809
   * @param string $unknown
4810
   * @param bool   $strict
4811
   *
4812
   * @return string
4813 10
   */
4814
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4815
  {
4816 10
    return self::to_ascii($str, $unknown, $strict);
4817 10
  }
4818
4819 10
  /**
4820 2
   * Counts number of words in the UTF-8 string.
4821 2
   *
4822
   * @param string $str      <p>The input string.</p>
4823 10
   * @param int    $format   [optional] <p>
4824 10
   *                         <strong>0</strong> => return a number of words (default)<br />
4825 2
   *                         <strong>1</strong> => return an array of words<br />
4826
   *                         <strong>2</strong> => return an array of words with word-offset as key
4827
   *                         </p>
4828 8
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4829
   *
4830
   * @return array|int <p>The number of words in the string</p>
4831
   */
4832
  public static function str_word_count($str, $format = 0, $charlist = '')
4833
  {
4834
    $strParts = self::str_to_words($str, $charlist);
4835
4836
    $len = count($strParts);
4837
4838
    if ($format === 1) {
4839
4840
      $numberOfWords = array();
4841
      for ($i = 1; $i < $len; $i += 2) {
4842
        $numberOfWords[] = $strParts[$i];
4843
      }
4844
4845 2
    } elseif ($format === 2) {
4846
4847 2
      $numberOfWords = array();
4848
      $offset = self::strlen($strParts[0]);
4849
      for ($i = 1; $i < $len; $i += 2) {
4850
        $numberOfWords[$offset] = $strParts[$i];
4851
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4852
      }
4853
4854 2
    } else {
4855 1
4856 1
      $numberOfWords = ($len - 1) / 2;
4857
4858
    }
4859
4860 2
    return $numberOfWords;
4861 2
  }
4862 2
4863 2
  /**
4864
   * Case-insensitive string comparison.
4865
   *
4866
   * INFO: Case-insensitive version of UTF8::strcmp()
4867
   *
4868
   * @param string $str1
4869
   * @param string $str2
4870
   *
4871
   * @return int <p>
4872
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4873
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4874
   *             <strong>0</strong> if they are equal.
4875
   *             </p>
4876
   */
4877
  public static function strcasecmp($str1, $str2)
4878
  {
4879
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4880
  }
4881
4882 11
  /**
4883
   * alias for "UTF8::strstr()"
4884 11
   *
4885 11
   * @see UTF8::strstr()
4886 11
   *
4887
   * @param string  $haystack
4888 11
   * @param string  $needle
4889 1
   * @param bool    $before_needle
4890 1
   * @param string  $encoding
4891 1
   * @param boolean $cleanUtf8
4892
   *
4893 11
   * @return string|false
4894
   */
4895 11
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4896
  {
4897 11
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4898 1
  }
4899 1
4900
  /**
4901
   * Case-sensitive string comparison.
4902 11
   *
4903 11
   * @param string $str1
4904
   * @param string $str2
4905 11
   *
4906
   * @return int  <p>
4907 11
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4908
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4909
   *              <strong>0</strong> if they are equal.
4910
   *              </p>
4911
   */
4912
  public static function strcmp($str1, $str2)
4913
  {
4914
    /** @noinspection PhpUndefinedClassInspection */
4915
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4916
        \Normalizer::normalize($str1, \Normalizer::NFD),
4917
        \Normalizer::normalize($str2, \Normalizer::NFD)
4918
    );
4919
  }
4920
4921 21
  /**
4922
   * Find length of initial segment not matching mask.
4923
   *
4924 21
   * @param string $str
4925
   * @param string $charList
4926 21
   * @param int    $offset
4927 6
   * @param int    $length
4928
   *
4929
   * @return int|null
4930 19
   */
4931
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4932
  {
4933
    if ('' === $charList .= '') {
4934
      return null;
4935
    }
4936 19
4937 2
    if ($offset || 2147483647 !== $length) {
4938 2
      $str = (string)self::substr($str, $offset, $length);
4939
    }
4940 19
4941
    $str = (string)$str;
4942
    if (!isset($str[0])) {
4943
      return null;
4944
    }
4945
4946
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4947
      /** @noinspection OffsetOperationsInspection */
4948
      return self::strlen($length[1]);
4949
    }
4950 3
4951
    return self::strlen($str);
4952 3
  }
4953
4954
  /**
4955
   * alias for "UTF8::stristr()"
4956
   *
4957
   * @see UTF8::stristr()
4958
   *
4959
   * @param string  $haystack
4960
   * @param string  $needle
4961
   * @param bool    $before_needle
4962
   * @param string  $encoding
4963
   * @param boolean $cleanUtf8
4964
   *
4965
   * @return string|false
4966 16
   */
4967
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4968 16
  {
4969
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4970 16
  }
4971 2
4972
  /**
4973
   * Create a UTF-8 string from code points.
4974 15
   *
4975
   * INFO: opposite to UTF8::codepoints()
4976
   *
4977
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4978
   *
4979
   * @return string <p>UTF-8 encoded string.</p>
4980 15
   */
4981 2
  public static function string(array $array)
4982 2
  {
4983
    return implode(
4984 15
        '',
4985
        array_map(
4986
            array(
4987
                '\\voku\\helper\\UTF8',
4988
                'chr',
4989
            ),
4990
            $array
4991
        )
4992
    );
4993
  }
4994
4995
  /**
4996
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4997
   *
4998
   * @param string $str <p>The input string.</p>
4999
   *
5000
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5001 1
   */
5002
  public static function string_has_bom($str)
5003 1
  {
5004 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
5005 1
      if (0 === strpos($str, $bomString)) {
5006 1
        return true;
5007 1
      }
5008
    }
5009 1
5010 1
    return false;
5011 1
  }
5012 1
5013 1
  /**
5014
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5015 1
   *
5016 1
   * @link http://php.net/manual/en/function.strip-tags.php
5017
   *
5018 1
   * @param string  $str            <p>
5019
   *                                The input string.
5020
   *                                </p>
5021
   * @param string  $allowable_tags [optional] <p>
5022
   *                                You can use the optional second parameter to specify tags which should
5023
   *                                not be stripped.
5024
   *                                </p>
5025
   *                                <p>
5026
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5027
   *                                can not be changed with allowable_tags.
5028
   *                                </p>
5029
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
5030 1
   *
5031
   * @return string <p>The stripped string.</p>
5032 1
   */
5033 1
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
5034 1
  {
5035
    $str = (string)$str;
5036 1
5037
    if (!isset($str[0])) {
5038
      return '';
5039
    }
5040 1
5041 1
    if ($cleanUtf8) {
5042
      $str = self::clean($str);
5043 1
    }
5044
5045
    return strip_tags($str, $allowable_tags);
5046
  }
5047
5048
  /**
5049
   * Finds position of first occurrence of a string within another, case insensitive.
5050
   *
5051
   * @link http://php.net/manual/en/function.mb-stripos.php
5052
   *
5053
   * @param string  $haystack  <p>
5054
   *                           The string from which to get the position of the first occurrence
5055
   *                           of needle
5056
   *                           </p>
5057
   * @param string  $needle    <p>
5058
   *                           The string to find in haystack
5059 47
   *                           </p>
5060
   * @param int     $offset    [optional] <p>
5061
   *                           The position in haystack
5062 47
   *                           to start searching
5063
   *                           </p>
5064 47
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5065 9
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5066
   *
5067
   * @return int|false <p>
5068 45
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
5069
   *                   or false if needle is not found.
5070
   *                   </p>
5071
   */
5072 1
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5073 1
  {
5074
    $haystack = (string)$haystack;
5075 45
    $needle = (string)$needle;
5076 45
    $offset = (int)$offset;
5077 37
5078 37
    if (!isset($haystack[0], $needle[0])) {
5079
      return false;
5080 45
    }
5081 2
5082
    if ($cleanUtf8 === true) {
5083
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5084 43
      // if invalid characters are found in $haystack before $needle
5085 20
      $haystack = self::clean($haystack);
5086 20
      $needle = self::clean($needle);
5087 41
    }
5088
5089 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5090 43
        $encoding === 'UTF-8'
5091
        ||
5092
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5093
    ) {
5094
      $encoding = 'UTF-8';
5095
    } else {
5096 43
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5097 2
    }
5098 43
5099 43
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5100 43
      self::checkForSupport();
5101 1
    }
5102
5103
    if (
5104 43
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5105 43
        &&
5106
        self::$SUPPORT['intl'] === true
5107
        &&
5108
        Bootup::is_php('5.4') === true
5109
    ) {
5110
      return \grapheme_stripos($haystack, $needle, $offset);
5111
    }
5112
5113
    // fallback to "mb_"-function via polyfill
5114
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5115
  }
5116
5117
  /**
5118
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5119
   *
5120
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5121
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5122
   * @param bool    $before_needle [optional] <p>
5123
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5124
   *                               haystack before the first occurrence of the needle (excluding the needle).
5125
   *                               </p>
5126
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5127
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5128
   *
5129
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
5130
   */
5131 View Code Duplication
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5132
  {
5133
    $haystack = (string)$haystack;
5134
    $needle = (string)$needle;
5135 1
    $before_needle = (bool)$before_needle;
5136
5137 1
    if (!isset($haystack[0], $needle[0])) {
5138 1
      return false;
5139
    }
5140 1
5141
    if ($encoding !== 'UTF-8') {
5142
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5143
    }
5144
5145
    if ($cleanUtf8 === true) {
5146
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5147
      // if invalid characters are found in $haystack before $needle
5148
      $needle = self::clean($needle);
5149
      $haystack = self::clean($haystack);
5150
    }
5151
5152
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5153
      self::checkForSupport();
5154
    }
5155
5156
    if (
5157
        $encoding !== 'UTF-8'
5158
        &&
5159
        self::$SUPPORT['mbstring'] === false
5160
    ) {
5161 1
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5162
    }
5163 1
5164 1
    if (self::$SUPPORT['mbstring'] === true) {
5165
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5166 1
    }
5167 1
5168
    if (
5169
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5170 1
        &&
5171 1
        self::$SUPPORT['intl'] === true
5172 1
        &&
5173
        Bootup::is_php('5.4') === true
5174 1
    ) {
5175 1
      return \grapheme_stristr($haystack, $needle, $before_needle);
5176
    }
5177
5178 1
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5179 1
5180
    if (!isset($match[1])) {
5181 1
      return false;
5182 1
    }
5183 1
5184
    if ($before_needle) {
5185 1
      return $match[1];
5186
    }
5187
5188
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 5149 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5189
  }
5190
5191
  /**
5192 1
   * Get the string length, not the byte-length!
5193
   *
5194
   * @link     http://php.net/manual/en/function.mb-strlen.php
5195
   *
5196
   * @param string  $str       <p>The string being checked for length.</p>
5197
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5198
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5199
   *
5200
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5201
   *             character counted as +1)</p>
5202
   */
5203
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5204
  {
5205
    $str = (string)$str;
5206
5207 6
    if (!isset($str[0])) {
5208
      return 0;
5209 6
    }
5210 1
5211 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5212
        $encoding === 'UTF-8'
5213 1
        ||
5214 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5215 1
    ) {
5216 1
      $encoding = 'UTF-8';
5217
    } else {
5218
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5219
    }
5220 1
5221 1
    switch ($encoding) {
5222 1
      case 'ASCII':
5223 1
      case 'CP850':
5224 1
        if (
5225 1
            $encoding === 'CP850'
5226 1
            &&
5227 1
            self::$SUPPORT['mbstring_func_overload'] === false
5228
        ) {
5229
          return strlen($str);
5230
        }
5231 1
5232 1
        return \mb_strlen($str, '8BIT');
5233 1
    }
5234 1
5235 1
    if ($cleanUtf8 === true) {
5236 1
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5237 1
      // if invalid characters are found in $str
5238 1
      $str = self::clean($str);
5239
    }
5240
5241 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5242 1
      self::checkForSupport();
5243 1
    }
5244 1
5245 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5246
        $encoding !== 'UTF-8'
5247
        &&
5248 1
        self::$SUPPORT['mbstring'] === false
5249
        &&
5250 6
        self::$SUPPORT['iconv'] === false
5251 1
    ) {
5252 1
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5253 1
    }
5254 1
5255
    if (
5256 1
        $encoding !== 'UTF-8'
5257
        &&
5258
        self::$SUPPORT['iconv'] === true
5259 6
        &&
5260 6
        self::$SUPPORT['mbstring'] === false
5261
    ) {
5262 6
      return \iconv_strlen($str, $encoding);
5263 4
    }
5264 4
5265
    if (self::$SUPPORT['mbstring'] === true) {
5266 6
      return \mb_strlen($str, $encoding);
5267
    }
5268 6
5269
    if (self::$SUPPORT['iconv'] === true) {
5270
      return \iconv_strlen($str, $encoding);
5271
    }
5272
5273
    if (
5274
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5275
        &&
5276
        self::$SUPPORT['intl'] === true
5277
        &&
5278
        Bootup::is_php('5.4') === true
5279
    ) {
5280 1
      return \grapheme_strlen($str);
5281
    }
5282 1
5283
    // fallback via vanilla php
5284 1
    preg_match_all('/./us', $str, $parts);
5285 1
    $returnTmp = count($parts[0]);
5286
    if ($returnTmp !== 0) {
5287
      return $returnTmp;
5288 1
    }
5289 1
5290 1
    // fallback to "mb_"-function via polyfill
5291
    return \mb_strlen($str, $encoding);
5292 1
  }
5293
5294
  /**
5295 1
   * Case insensitive string comparisons using a "natural order" algorithm.
5296 1
   *
5297
   * INFO: natural order version of UTF8::strcasecmp()
5298 1
   *
5299 1
   * @param string $str1 <p>The first string.</p>
5300
   * @param string $str2 <p>The second string.</p>
5301 1
   *
5302
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5303 1
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5304 1
   *             <strong>0</strong> if they are equal
5305
   */
5306 1
  public static function strnatcasecmp($str1, $str2)
5307
  {
5308 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5309
  }
5310 1
5311
  /**
5312 1
   * String comparisons using a "natural order" algorithm
5313
   *
5314
   * INFO: natural order version of UTF8::strcmp()
5315
   *
5316
   * @link  http://php.net/manual/en/function.strnatcmp.php
5317
   *
5318
   * @param string $str1 <p>The first string.</p>
5319
   * @param string $str2 <p>The second string.</p>
5320
   *
5321
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5322
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5323
   *             <strong>0</strong> if they are equal
5324
   */
5325
  public static function strnatcmp($str1, $str2)
5326 7
  {
5327
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5328 7
  }
5329
5330
  /**
5331
   * Case-insensitive string comparison of the first n characters.
5332
   *
5333
   * @link  http://php.net/manual/en/function.strncasecmp.php
5334
   *
5335
   * @param string $str1 <p>The first string.</p>
5336
   * @param string $str2 <p>The second string.</p>
5337
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5338
   *
5339
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5340 1
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5341
   *             <strong>0</strong> if they are equal
5342 1
   */
5343
  public static function strncasecmp($str1, $str2, $len)
5344
  {
5345
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5346
  }
5347
5348
  /**
5349
   * String comparison of the first n characters.
5350
   *
5351
   * @link  http://php.net/manual/en/function.strncmp.php
5352
   *
5353
   * @param string $str1 <p>The first string.</p>
5354 1
   * @param string $str2 <p>The second string.</p>
5355
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5356 1
   *
5357
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5358
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5359
   *             <strong>0</strong> if they are equal
5360
   */
5361
  public static function strncmp($str1, $str2, $len)
5362
  {
5363
    $str1 = self::substr($str1, 0, $len);
5364
    $str2 = self::substr($str2, 0, $len);
5365
5366
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5363 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5364 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5367
  }
5368 1
5369
  /**
5370 1
   * Search a string for any of a set of characters.
5371
   *
5372
   * @link  http://php.net/manual/en/function.strpbrk.php
5373
   *
5374
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5375
   * @param string $char_list <p>This parameter is case sensitive.</p>
5376
   *
5377
   * @return string String starting from the character found, or false if it is not found.
5378
   */
5379
  public static function strpbrk($haystack, $char_list)
5380
  {
5381
    $haystack = (string)$haystack;
5382
    $char_list = (string)$char_list;
5383
5384
    if (!isset($haystack[0], $char_list[0])) {
5385 13
      return false;
5386
    }
5387 13
5388
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5389
      return substr($haystack, strpos($haystack, $m[0]));
5390 13
    }
5391
5392 13
    return false;
5393 3
  }
5394
5395
  /**
5396 11
   * Find position of first occurrence of string in a string.
5397
   *
5398
   * @link http://php.net/manual/en/function.mb-strpos.php
5399 11
   *
5400 7
   * @param string  $haystack  <p>The string being checked.</p>
5401
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5402
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5403 5
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5404 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5405
   *
5406
   * @return int|false <p>
5407
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5408 1
   *                   If needle is not found it returns false.
5409 1
   *                   </p>
5410
   */
5411
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5412 1
  {
5413 1
    $haystack = (string)$haystack;
5414
    $needle = (string)$needle;
5415
5416 1
    if (!isset($haystack[0], $needle[0])) {
5417
      return false;
5418
    }
5419 1
5420
    // init
5421 5
    $offset = (int)$offset;
5422 5
5423 5
    // iconv and mbstring do not support integer $needle
5424
5425 5
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5426
      $needle = (string)self::chr($needle);
5427 5
    }
5428 5
5429
    if ($cleanUtf8 === true) {
5430
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5431 5
      // if invalid characters are found in $haystack before $needle
5432
      $needle = self::clean($needle);
5433
      $haystack = self::clean($haystack);
5434 5
    }
5435 5
5436 5 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5437
        $encoding === 'UTF-8'
5438 5
        ||
5439 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5440
    ) {
5441 2
      $encoding = 'UTF-8';
5442 2
    } else {
5443 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5444
    }
5445 2
5446 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5447
      self::checkForSupport();
5448 1
    }
5449 1
5450 1
    if (
5451
        $encoding === 'CP850'
5452 1
        &&
5453
        self::$SUPPORT['mbstring_func_overload'] === false
5454
    ) {
5455
      return strpos($haystack, $needle, $offset);
5456
    }
5457
5458 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5459
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5460
        &
5461
        self::$SUPPORT['iconv'] === true
5462
        &&
5463
        self::$SUPPORT['mbstring'] === false
5464
    ) {
5465
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5466
    }
5467 1
5468 2
    if (
5469
        $offset >= 0 // iconv_strpos() can't handle negative offset
5470 5
        &&
5471
        $encoding !== 'UTF-8'
5472
        &&
5473
        self::$SUPPORT['mbstring'] === false
5474
        &&
5475 5
        self::$SUPPORT['iconv'] === true
5476
    ) {
5477
      // ignore invalid negative offset to keep compatibility
5478
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5479
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5480 5
    }
5481 5
5482 1
    if (self::$SUPPORT['mbstring'] === true) {
5483 1
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5484
    }
5485 1
5486 1
    if (
5487 1
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5488
        &&
5489 1
        self::$SUPPORT['intl'] === true
5490
        &&
5491 5
        Bootup::is_php('5.4') === true
5492 5
    ) {
5493 5
      return \grapheme_strpos($haystack, $needle, $offset);
5494 5
    }
5495 1
5496
    if (
5497 5
        $offset >= 0 // iconv_strpos() can't handle negative offset
5498
        &&
5499 5
        self::$SUPPORT['iconv'] === true
5500
    ) {
5501
      // ignore invalid negative offset to keep compatibility
5502
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5503
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5504
    }
5505
5506
    // fallback via vanilla php
5507
5508
    $haystack = self::substr($haystack, $offset);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::substr($haystack, $offset) on line 5508 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5509 2
5510
    if ($offset < 0) {
5511 2
      $offset = 0;
5512
    }
5513 1
5514
    $pos = strpos($haystack, $needle);
5515
    if ($pos === false) {
5516 1
      return false;
5517 1
    }
5518
5519 1
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5520
    if ($returnTmp !== false) {
5521
      return $returnTmp;
5522 2
    }
5523
5524 2
    // fallback to "mb_"-function via polyfill
5525 1
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5526
  }
5527
5528 2
  /**
5529
   * Finds the last occurrence of a character in a string within another.
5530
   *
5531
   * @link http://php.net/manual/en/function.mb-strrchr.php
5532
   *
5533
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5534
   * @param string $needle        <p>The string to find in haystack</p>
5535
   * @param bool   $before_needle [optional] <p>
5536
   *                              Determines which portion of haystack
5537
   *                              this function returns.
5538
   *                              If set to true, it returns all of haystack
5539
   *                              from the beginning to the last occurrence of needle.
5540 1
   *                              If set to false, it returns all of haystack
5541
   *                              from the last occurrence of needle to the end,
5542 1
   *                              </p>
5543
   * @param string $encoding      [optional] <p>
5544
   *                              Character encoding name to use.
5545
   *                              If it is omitted, internal character encoding is used.
5546
   *                              </p>
5547
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5548
   *
5549
   * @return string|false The portion of haystack or false if needle is not found.
5550
   */
5551 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5552
  {
5553
    if ($encoding !== 'UTF-8') {
5554
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5555
    }
5556
5557
    if ($cleanUtf8 === true) {
5558
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5559
      // if invalid characters are found in $haystack before $needle
5560
      $needle = self::clean($needle);
5561
      $haystack = self::clean($haystack);
5562
    }
5563
5564
    // fallback to "mb_"-function via polyfill
5565
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5566
  }
5567
5568 20
  /**
5569
   * Reverses characters order in the string.
5570 20
   *
5571 2
   * @param string $str The input string
5572
   *
5573
   * @return string The string with characters in the reverse sequence
5574 2
   */
5575 2
  public static function strrev($str)
5576
  {
5577 2
    $str = (string)$str;
5578
5579
    if (!isset($str[0])) {
5580 20
      return '';
5581
    }
5582 20
5583 4
    return implode('', array_reverse(self::split($str)));
5584
  }
5585
5586 19
  /**
5587 19
   * Finds the last occurrence of a character in a string within another, case insensitive.
5588
   *
5589
   * @link http://php.net/manual/en/function.mb-strrichr.php
5590 19
   *
5591 19
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5592
   * @param string  $needle        <p>The string to find in haystack.</p>
5593 19
   * @param bool    $before_needle [optional] <p>
5594 19
   *                               Determines which portion of haystack
5595 19
   *                               this function returns.
5596 19
   *                               If set to true, it returns all of haystack
5597
   *                               from the beginning to the last occurrence of needle.
5598 19
   *                               If set to false, it returns all of haystack
5599
   *                               from the last occurrence of needle to the end,
5600 16
   *                               </p>
5601 16
   * @param string  $encoding      [optional] <p>
5602 16
   *                               Character encoding name to use.
5603 16
   *                               If it is omitted, internal character encoding is used.
5604 5
   *                               </p>
5605 5
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5606 5
   *
5607
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5608
   */
5609 19 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5610
  {
5611 17
    if ($encoding !== 'UTF-8') {
5612 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5613 13
    }
5614 13
5615 8
    if ($cleanUtf8 === true) {
5616 8
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5617 8
      // if invalid characters are found in $haystack before $needle
5618
      $needle = self::clean($needle);
5619
      $haystack = self::clean($haystack);
5620 19
    }
5621
5622 9
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5623 4
  }
5624 4
5625 4
  /**
5626 6
   * Find position of last occurrence of a case-insensitive string.
5627 6
   *
5628 6
   * @param string  $haystack  <p>The string to look in.</p>
5629
   * @param string  $needle    <p>The string to look for.</p>
5630
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5631 9
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5632 6
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5633 6
   *
5634 6
   * @return int|false <p>
5635
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5636
   *                   not found, it returns false.
5637 19
   *                   </p>
5638
   */
5639 4
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5640 4
  {
5641 2
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5642 2
      $needle = (string)self::chr($needle);
5643 3
    }
5644 3
5645 3
    // init
5646
    $haystack = (string)$haystack;
5647
    $needle = (string)$needle;
5648 4
    $offset = (int)$offset;
5649 16
5650
    if (!isset($haystack[0], $needle[0])) {
5651 19
      return false;
5652
    }
5653
5654 19 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5655 19
        $cleanUtf8 === true
5656
        ||
5657 3
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5658 19
    ) {
5659
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5660 19
5661
      $needle = self::clean($needle);
5662
      $haystack = self::clean($haystack);
5663 19
    }
5664 19
5665 19 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5666 2
        $encoding === 'UTF-8'
5667 19
        ||
5668
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5669 19
    ) {
5670
      $encoding = 'UTF-8';
5671 19
    } else {
5672
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5673
    }
5674
5675
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5676
      self::checkForSupport();
5677
    }
5678
5679
    if (
5680
        $encoding !== 'UTF-8'
5681
        &&
5682
        self::$SUPPORT['mbstring'] === false
5683
    ) {
5684
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5685
    }
5686
5687 26
    if (self::$SUPPORT['mbstring'] === true) {
5688
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5689 26
    }
5690
5691 26
    if (
5692 5
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5693
        &&
5694
        self::$SUPPORT['intl'] === true
5695
        &&
5696 22
        Bootup::is_php('5.4') === true
5697 6
    ) {
5698
      return \grapheme_strripos($haystack, $needle, $offset);
5699
    }
5700 16
5701
    // fallback via vanilla php
5702
5703
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 5662 can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $needle defined by self::clean($needle) on line 5661 can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5704
  }
5705
5706
  /**
5707
   * Find position of last occurrence of a string in a string.
5708
   *
5709
   * @link http://php.net/manual/en/function.mb-strrpos.php
5710
   *
5711
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5712 14
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5713
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5714 14
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5715
   *                              the end of the string.
5716
   *                              </p>
5717
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5718
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5719
   *
5720
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5721
   *                   is not found, it returns false.</p>
5722
   */
5723
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5724
  {
5725
    if ((int)$needle === $needle && $needle >= 0) {
5726
      $needle = (string)self::chr($needle);
5727
    }
5728 1
5729
    // init
5730 1
    $haystack = (string)$haystack;
5731
    $needle = (string)$needle;
5732
    $offset = (int)$offset;
5733
5734
    if (!isset($haystack[0], $needle[0])) {
5735
      return false;
5736
    }
5737
5738 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5739
        $cleanUtf8 === true
5740
        ||
5741
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5742
    ) {
5743
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5744 8
      $needle = self::clean($needle);
5745
      $haystack = self::clean($haystack);
5746 8
    }
5747 2
5748 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5749
        $encoding === 'UTF-8'
5750 7
        ||
5751 7
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5752 7
    ) {
5753
      $encoding = 'UTF-8';
5754 7
    } else {
5755 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5756 1
    }
5757 7
5758
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5759
      self::checkForSupport();
5760 7
    }
5761
5762 7
    if (
5763 7
        $encoding !== 'UTF-8'
5764
        &&
5765
        self::$SUPPORT['mbstring'] === false
5766
    ) {
5767 7
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5768
    }
5769
5770
    if (self::$SUPPORT['mbstring'] === true) {
5771 1
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5772 1
    }
5773 1
5774 7
    if (
5775 7
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5776 7
        &&
5777
        self::$SUPPORT['intl'] === true
5778 7
        &&
5779 7
        Bootup::is_php('5.4') === true
5780
    ) {
5781 7
      return \grapheme_strrpos($haystack, $needle, $offset);
5782
    }
5783
5784
    // fallback via vanilla php
5785
5786
    if ($offset > 0) {
5787
      $haystack = self::substr($haystack, $offset);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::substr($haystack, $offset) on line 5787 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5788
    } elseif ($offset < 0) {
5789
      $haystack = self::substr($haystack, 0, $offset);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::substr($haystack, 0, $offset) on line 5789 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5790
      $offset = 0;
5791
    }
5792
5793
    $pos = strrpos($haystack, $needle);
5794
    if ($pos === false) {
5795
      return false;
5796
    }
5797
5798
    return $offset + self::strlen(substr($haystack, 0, $pos));
5799
  }
5800
5801 1
  /**
5802
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5803 1
   * mask.
5804
   *
5805 1
   * @param string $str    <p>The input string.</p>
5806 1
   * @param string $mask   <p>The mask of chars</p>
5807
   * @param int    $offset [optional]
5808
   * @param int    $length [optional]
5809 1
   *
5810
   * @return int
5811 1
   */
5812
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5813 1
  {
5814 1
    // init
5815 1
    $length = (int)$length;
5816 1
    $offset = (int)$offset;
5817
5818 1
    if ($offset || 2147483647 !== $length) {
5819 1
      $str = self::substr($str, $offset, $length);
5820 1
    }
5821
5822 1
    $str = (string)$str;
5823
    if (!isset($str[0], $mask[0])) {
5824
      return 0;
5825
    }
5826
5827
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5828
  }
5829
5830 1
  /**
5831
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5832
   *
5833
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5834
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5835
   * @param bool    $before_needle [optional] <p>
5836
   *                               If <b>TRUE</b>, strstr() returns the part of the
5837
   *                               haystack before the first occurrence of the needle (excluding the needle).
5838
   *                               </p>
5839
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5840
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5841
   *
5842
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5843
   */
5844 View Code Duplication
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5845
  {
5846
    $haystack = (string)$haystack;
5847
    $needle = (string)$needle;
5848
5849
    if (!isset($haystack[0], $needle[0])) {
5850
      return false;
5851
    }
5852
5853
    if ($cleanUtf8 === true) {
5854
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5855
      // if invalid characters are found in $haystack before $needle
5856
      $needle = self::clean($needle);
5857
      $haystack = self::clean($haystack);
5858
    }
5859
5860
    if ($encoding !== 'UTF-8') {
5861
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5862
    }
5863
5864
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5865
      self::checkForSupport();
5866
    }
5867
5868
    if (
5869
        $encoding !== 'UTF-8'
5870
        &&
5871
        self::$SUPPORT['mbstring'] === false
5872
    ) {
5873
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5874
    }
5875
5876
    if (self::$SUPPORT['mbstring'] === true) {
5877
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5878
    }
5879
5880
    if (
5881
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5882
        &&
5883
        self::$SUPPORT['intl'] === true
5884
        &&
5885
        Bootup::is_php('5.4') === true
5886
    ) {
5887
      return \grapheme_strstr($haystack, $needle, $before_needle);
5888
    }
5889
5890
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5891
5892
    if (!isset($match[1])) {
5893
      return false;
5894
    }
5895
5896
    if ($before_needle) {
5897
      return $match[1];
5898
    }
5899
5900
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 5857 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5901
  }
5902
5903
  /**
5904
   * Unicode transformation for case-less matching.
5905
   *
5906
   * @link http://unicode.org/reports/tr21/tr21-5.html
5907
   *
5908
   * @param string  $str       <p>The input string.</p>
5909
   * @param bool    $full      [optional] <p>
5910
   *                           <b>true</b>, replace full case folding chars (default)<br />
5911
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5912
   *                           </p>
5913
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5914
   *
5915
   * @return string
5916
   */
5917
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5918
  {
5919
    // init
5920
    $str = (string)$str;
5921
5922
    if (!isset($str[0])) {
5923
      return '';
5924
    }
5925
5926
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5927
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5928
5929
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5930
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
5931
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
5932
    }
5933
5934
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5935
5936
    if ($full) {
5937
5938
      static $FULL_CASE_FOLD = null;
5939
5940
      if ($FULL_CASE_FOLD === null) {
5941
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5942
      }
5943
5944
      /** @noinspection OffsetOperationsInspection */
5945
      $str = str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5946
    }
5947
5948
    if ($cleanUtf8 === true) {
5949
      $str = self::clean($str);
5950
    }
5951
5952
    return self::strtolower($str);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str) on line 5949 can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5953
  }
5954
5955
  /**
5956
   * Make a string lowercase.
5957
   *
5958
   * @link http://php.net/manual/en/function.mb-strtolower.php
5959
   *
5960
   * @param string  $str       <p>The string being lowercased.</p>
5961
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5962
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5963
   * @param string|null $lang  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5964
   *
5965
   * @return string str with all alphabetic characters converted to lowercase.
5966
   */
5967 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5968
  {
5969
    // init
5970
    $str = (string)$str;
5971
5972
    if (!isset($str[0])) {
5973
      return '';
5974
    }
5975
5976
    if ($cleanUtf8 === true) {
5977
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5978
      // if invalid characters are found in $haystack before $needle
5979
      $str = self::clean($str);
5980
    }
5981
5982
    if ($encoding !== 'UTF-8') {
5983
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5984
    }
5985
5986
    if ($lang !== null) {
5987
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5988
        self::checkForSupport();
5989
      }
5990
5991
      if (
5992
          self::$SUPPORT['intl'] === true
5993
          &&
5994
          Bootup::is_php('5.4') === true
5995
      ) {
5996
5997
        $langCode = $lang . '-Lower';
5998
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5999
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6000
6001
          $langCode = 'Any-Lower';
6002
        }
6003
6004
        return transliterator_transliterate($langCode, $str);
6005
      }
6006
6007
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6008
    }
6009
6010
    return \mb_strtolower($str, $encoding);
6011
  }
6012
6013
  /**
6014
   * Generic case sensitive transformation for collation matching.
6015
   *
6016
   * @param string $str <p>The input string</p>
6017
   *
6018
   * @return string
6019
   */
6020
  private static function strtonatfold($str)
6021
  {
6022
    /** @noinspection PhpUndefinedClassInspection */
6023
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6024
  }
6025
6026
  /**
6027
   * Make a string uppercase.
6028
   *
6029
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6030
   *
6031
   * @param string  $str       <p>The string being uppercased.</p>
6032
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6033
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6034
   * @param string|null $lang  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6035
   *
6036
   * @return string str with all alphabetic characters converted to uppercase.
6037
   */
6038 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6039
  {
6040
    $str = (string)$str;
6041
6042
    if (!isset($str[0])) {
6043
      return '';
6044
    }
6045
6046
    if ($cleanUtf8 === true) {
6047
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6048
      // if invalid characters are found in $haystack before $needle
6049
      $str = self::clean($str);
6050
    }
6051
6052
    if ($encoding !== 'UTF-8') {
6053
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6054
    }
6055
6056
    if ($lang !== null) {
6057 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6058
        self::checkForSupport();
6059 1
      }
6060
6061
      if (
6062
          self::$SUPPORT['intl'] === true
6063
          &&
6064
          Bootup::is_php('5.4') === true
6065
      ) {
6066
6067
        $langCode = $lang . '-Upper';
6068
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6069 6
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6070
6071 6
          $langCode = 'Any-Upper';
6072 6
        }
6073
6074 6
        return transliterator_transliterate($langCode, $str);
6075
      }
6076 6
6077 3
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6078
    }
6079
6080
    return \mb_strtoupper($str, $encoding);
6081 6
  }
6082
6083 6
  /**
6084 1
   * Translate characters or replace sub-strings.
6085 1
   *
6086 1
   * @link  http://php.net/manual/en/function.strtr.php
6087
   *
6088 6
   * @param string          $str  <p>The string being translated.</p>
6089
   * @param string|string[] $from <p>The string replacing from.</p>
6090
   * @param string|string[] $to   <p>The string being translated to to.</p>
6091
   *
6092
   * @return string <p>
6093
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6094
   *                corresponding character in to.
6095
   *                </p>
6096
   */
6097
  public static function strtr($str, $from, $to = INF)
6098 6
  {
6099
    if (INF !== $to) {
6100 6
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6100 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6101
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6101 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6102 6
      $countFrom = count($from);
6103 6
      $countTo = count($to);
6104
6105
      if ($countFrom > $countTo) {
6106 5
        $from = array_slice($from, 0, $countTo);
6107 5
      } elseif ($countFrom < $countTo) {
6108
        $to = array_slice($to, 0, $countFrom);
6109 5
      }
6110 1
6111 1
      $from = array_combine($from, $to);
6112 1
    }
6113
6114 5
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 6097 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6115
  }
6116
6117
  /**
6118
   * Return the width of a string.
6119
   *
6120
   * @param string  $str       <p>The input string.</p>
6121
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6122
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6123
   *
6124
   * @return int
6125
   */
6126
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6127
  {
6128
    if ($encoding !== 'UTF-8') {
6129
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6130
    }
6131
6132
    if ($cleanUtf8 === true) {
6133
      // iconv and mbstring are not tolerant to invalid encoding
6134
      // further, their behaviour is inconsistent with that of PHP's substr
6135
      $str = self::clean($str);
6136
    }
6137
6138
    // fallback to "mb_"-function via polyfill
6139
    return \mb_strwidth($str, $encoding);
6140
  }
6141
6142
  /**
6143
   * Get part of a string.
6144 1
   *
6145
   * @link http://php.net/manual/en/function.mb-substr.php
6146 1
   *
6147
   * @param string  $str       <p>The string being checked.</p>
6148
   * @param int     $start     <p>The first position used in str.</p>
6149
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6150
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6151
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6152
   *
6153
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
6154
   */
6155
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6156
  {
6157
    // init
6158 1
    $str = (string)$str;
6159
6160 1
    if (!isset($str[0])) {
6161
      return '';
6162 1
    }
6163 1
6164
    if ($cleanUtf8 === true) {
6165
      // iconv and mbstring are not tolerant to invalid encoding
6166 1
      // further, their behaviour is inconsistent with that of PHP's substr
6167
      $str = self::clean($str);
6168 1
    }
6169 1
6170
    $str_length = 0;
6171
    if ($start || $length === null) {
6172 1
      $str_length = (int)self::strlen($str, $encoding);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str) on line 6167 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6173
    }
6174
6175 1
    if ($start && $start > $str_length) {
6176 1
      return false;
6177 1
    }
6178 1
6179 1
    if ($length === null) {
6180
      $length = $str_length;
6181
    } else {
6182 1
      $length = (int)$length;
6183
    }
6184
6185 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6186
        $encoding === 'UTF-8'
6187
        ||
6188
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6189
    ) {
6190
      $encoding = 'UTF-8';
6191
    } else {
6192
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6193
    }
6194
6195
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6196
      self::checkForSupport();
6197
    }
6198
6199
    if (
6200
        $encoding === 'CP850'
6201 10
        &&
6202
        self::$SUPPORT['mbstring_func_overload'] === false
6203 10
    ) {
6204 10
      return substr($str, $start, $length === null ? $str_length : $length);
6205
    }
6206 10
6207 3
    if (
6208
        $encoding !== 'UTF-8'
6209
        &&
6210 8
        self::$SUPPORT['mbstring'] === false
6211 8
    ) {
6212 8
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6213
    }
6214 8
6215
    if (self::$SUPPORT['mbstring'] === true) {
6216 8
      return \mb_substr($str, $start, $length, $encoding);
6217
    }
6218 8
6219 1
    if (
6220 1
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6221 1
        &&
6222
        self::$SUPPORT['intl'] === true
6223 8
        &&
6224 8
        Bootup::is_php('5.4') === true
6225
    ) {
6226 8
      return \grapheme_substr($str, $start, $length);
6227 8
    }
6228 8
6229 8
    if (
6230 8
        $length >= 0 // "iconv_substr()" can't handle negative length
6231
        &&
6232 8
        self::$SUPPORT['iconv'] === true
6233 8
    ) {
6234 8
      return \iconv_substr($str, $start, $length);
6235 8
    }
6236
6237 8
    // fallback via vanilla php
6238 6
6239 6
    // split to array, and remove invalid characters
6240 6
    $array = self::split($str);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str) on line 6167 can also be of type false; however, voku\helper\UTF8::split() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6241 6
6242
    // extract relevant part, and join to make sting again
6243 6
    return implode('', array_slice($array, $start, $length));
6244 3
  }
6245 3
6246
  /**
6247 6
   * Binary safe comparison of two strings from an offset, up to length characters.
6248 6
   *
6249
   * @param string  $main_str           <p>The main string being compared.</p>
6250 8
   * @param string  $str                <p>The secondary string being compared.</p>
6251
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
6252
   *                                    the end of the string.</p>
6253
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6254
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6255
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6256
   *                                    insensitive.</p>
6257
   *
6258 1
   * @return int
6259
   */
6260 1
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
6261
  {
6262
    $main_str = self::substr($main_str, $offset, $length);
6263
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6262 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6264
6265
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6262 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6263 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6262 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6263 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6266
  }
6267
6268
  /**
6269
   * Count the number of substring occurrences.
6270
   *
6271
   * @link  http://php.net/manual/en/function.substr-count.php
6272
   *
6273
   * @param string  $haystack  <p>The string to search in.</p>
6274
   * @param string  $needle    <p>The substring to search for.</p>
6275
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6276
   * @param int     $length    [optional] <p>
6277
   *                           The maximum length after the specified offset to search for the
6278
   *                           substring. It outputs a warning if the offset plus the length is
6279
   *                           greater than the haystack length.
6280
   *                           </p>
6281
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
6282
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6283
   *
6284
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6285
   */
6286
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6287
  {
6288
    // init
6289
    $haystack = (string)$haystack;
6290
    $needle = (string)$needle;
6291
6292
    if (!isset($haystack[0], $needle[0])) {
6293
      return false;
6294
    }
6295
6296
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6297
      $offset = (int)$offset;
6298
      $length = (int)$length;
6299
6300
      if (
6301
          $length + $offset <= 0
6302
          &&
6303
          Bootup::is_php('7.1') === false
6304
      ) {
6305
        return false;
6306
      }
6307
6308
      $haystack = self::substr($haystack, $offset, $length, $encoding);
6309
    }
6310
6311
    if ($encoding !== 'UTF-8') {
6312
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6313
    }
6314
6315
    if ($cleanUtf8 === true) {
6316
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6317
      // if invalid characters are found in $haystack before $needle
6318
      $needle = self::clean($needle);
6319
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 6319 can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6320
    }
6321
6322
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6323
      self::checkForSupport();
6324
    }
6325
6326
    if (
6327
        $encoding !== 'UTF-8'
6328
        &&
6329
        self::$SUPPORT['mbstring'] === false
6330
    ) {
6331
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6332
    }
6333
6334
    if (self::$SUPPORT['mbstring'] === true) {
6335
      return \mb_substr_count($haystack, $needle, $encoding);
6336
    }
6337
6338
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6339
6340
    return count($matches);
6341
  }
6342
6343
  /**
6344
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6345
   *
6346
   * @param string $haystack <p>The string to search in.</p>
6347
   * @param string $needle   <p>The substring to search for.</p>
6348
   *
6349
   * @return string <p>Return the sub-string.</p>
6350
   */
6351 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6352
  {
6353
    // init
6354
    $haystack = (string)$haystack;
6355
    $needle = (string)$needle;
6356
6357
    if (!isset($haystack[0])) {
6358
      return '';
6359
    }
6360
6361
    if (!isset($needle[0])) {
6362
      return $haystack;
6363
    }
6364
6365
    if (self::str_istarts_with($haystack, $needle) === true) {
6366
      $haystack = self::substr($haystack, self::strlen($needle));
6367
    }
6368
6369
    return $haystack;
6370
  }
6371
6372
  /**
6373
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6374
   *
6375
   * @param string $haystack <p>The string to search in.</p>
6376
   * @param string $needle   <p>The substring to search for.</p>
6377
   *
6378
   * @return string <p>Return the sub-string.</p>
6379
   */
6380 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6381
  {
6382
    // init
6383
    $haystack = (string)$haystack;
6384
    $needle = (string)$needle;
6385
6386
    if (!isset($haystack[0])) {
6387
      return '';
6388
    }
6389
6390
    if (!isset($needle[0])) {
6391
      return $haystack;
6392
    }
6393
6394
    if (self::str_iends_with($haystack, $needle) === true) {
6395
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6396
    }
6397
6398
    return $haystack;
6399
  }
6400
6401
  /**
6402
   * Removes an prefix ($needle) from start of the string ($haystack).
6403
   *
6404
   * @param string $haystack <p>The string to search in.</p>
6405
   * @param string $needle   <p>The substring to search for.</p>
6406
   *
6407
   * @return string <p>Return the sub-string.</p>
6408
   */
6409 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6410
  {
6411
    // init
6412
    $haystack = (string)$haystack;
6413
    $needle = (string)$needle;
6414
6415
    if (!isset($haystack[0])) {
6416
      return '';
6417
    }
6418
6419
    if (!isset($needle[0])) {
6420
      return $haystack;
6421
    }
6422
6423
    if (self::str_starts_with($haystack, $needle) === true) {
6424
      $haystack = self::substr($haystack, self::strlen($needle));
6425
    }
6426
6427
    return $haystack;
6428
  }
6429
6430
  /**
6431
   * Replace text within a portion of a string.
6432
   *
6433
   * source: https://gist.github.com/stemar/8287074
6434
   *
6435
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6436
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6437
   * @param int|int[]       $start            <p>
6438
   *                                          If start is positive, the replacing will begin at the start'th offset
6439
   *                                          into string.
6440
   *                                          <br /><br />
6441
   *                                          If start is negative, the replacing will begin at the start'th character
6442
   *                                          from the end of string.
6443
   *                                          </p>
6444
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6445
   *                                          portion of string which is to be replaced. If it is negative, it
6446
   *                                          represents the number of characters from the end of string at which to
6447
   *                                          stop replacing. If it is not given, then it will default to strlen(
6448
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6449
   *                                          length is zero then this function will have the effect of inserting
6450
   *                                          replacement into string at the given start offset.</p>
6451
   *
6452
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6453
   */
6454
  public static function substr_replace($str, $replacement, $start, $length = null)
6455
  {
6456
    if (is_array($str) === true) {
6457
      $num = count($str);
6458
6459
      // $replacement
6460
      if (is_array($replacement) === true) {
6461
        $replacement = array_slice($replacement, 0, $num);
6462
      } else {
6463
        $replacement = array_pad(array($replacement), $num, $replacement);
6464
      }
6465
6466
      // $start
6467 View Code Duplication
      if (is_array($start) === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6468
        $start = array_slice($start, 0, $num);
6469
        foreach ($start as &$valueTmp) {
6470
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6471
        }
6472
        unset($valueTmp);
6473
      } else {
6474
        $start = array_pad(array($start), $num, $start);
6475
      }
6476
6477
      // $length
6478
      if (!isset($length)) {
6479
        $length = array_fill(0, $num, 0);
6480 View Code Duplication
      } elseif (is_array($length) === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6481
        $length = array_slice($length, 0, $num);
6482
        foreach ($length as &$valueTmpV2) {
6483
          if (isset($valueTmpV2)) {
6484
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6485
          } else {
6486
            $valueTmpV2 = 0;
6487
          }
6488
        }
6489
        unset($valueTmpV2);
6490
      } else {
6491
        $length = array_pad(array($length), $num, $length);
6492
      }
6493
6494
      // Recursive call
6495
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $start, $length);
6496
6497
    }
6498
6499
    if (is_array($replacement) === true) {
6500
      if (count($replacement) > 0) {
6501
        $replacement = $replacement[0];
6502
      } else {
6503
        $replacement = '';
6504
      }
6505
    }
6506
6507
    // init
6508
    $str = (string)$str;
6509
    $replacement = (string)$replacement;
6510
6511
    if (!isset($str[0])) {
6512
      return $replacement;
6513
    }
6514
6515
    preg_match_all('/./us', $str, $smatches);
6516
    preg_match_all('/./us', $replacement, $rmatches);
6517
6518
    if ($length === null) {
6519
      $length = (int)self::strlen($str);
6520
    }
6521
6522
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6523
6524
    return implode('', $smatches[0]);
6525
  }
6526
6527
  /**
6528
   * Removes an suffix ($needle) from end of the string ($haystack).
6529
   *
6530
   * @param string $haystack <p>The string to search in.</p>
6531
   * @param string $needle   <p>The substring to search for.</p>
6532
   *
6533
   * @return string <p>Return the sub-string.</p>
6534
   */
6535 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6536
  {
6537
    $haystack = (string)$haystack;
6538
    $needle = (string)$needle;
6539
6540
    if (!isset($haystack[0])) {
6541
      return '';
6542
    }
6543
6544
    if (!isset($needle[0])) {
6545
      return $haystack;
6546
    }
6547
6548
    if (self::str_ends_with($haystack, $needle) === true) {
6549
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6550
    }
6551
6552
    return $haystack;
6553
  }
6554
6555
  /**
6556
   * Returns a case swapped version of the string.
6557
   *
6558
   * @param string  $str       <p>The input string.</p>
6559
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6560
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6561
   *
6562
   * @return string <p>Each character's case swapped.</p>
6563
   */
6564
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6565
  {
6566
    $str = (string)$str;
6567
6568
    if (!isset($str[0])) {
6569
      return '';
6570
    }
6571
6572
    if ($encoding !== 'UTF-8') {
6573
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6574
    }
6575
6576
    if ($cleanUtf8 === true) {
6577
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6578
      // if invalid characters are found in $haystack before $needle
6579
      $str = self::clean($str);
6580
    }
6581
6582
    $strSwappedCase = preg_replace_callback(
6583
        '/[\S]/u',
6584
        function ($match) use ($encoding) {
6585
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6586
6587
          if ($match[0] === $marchToUpper) {
6588
            return UTF8::strtolower($match[0], $encoding);
6589
          }
6590
6591
          return $marchToUpper;
6592
        },
6593
        $str
6594
    );
6595
6596
    return $strSwappedCase;
6597
  }
6598
6599
  /**
6600
   * alias for "UTF8::to_ascii()"
6601
   *
6602
   * @see UTF8::to_ascii()
6603
   *
6604
   * @param string $s
6605
   * @param string $subst_chr
6606
   * @param bool   $strict
6607
   *
6608
   * @return string
6609
   *
6610
   * @deprecated
6611
   */
6612
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6613
  {
6614
    return self::to_ascii($s, $subst_chr, $strict);
6615
  }
6616
6617
  /**
6618
   * alias for "UTF8::to_iso8859()"
6619
   *
6620
   * @see UTF8::to_iso8859()
6621
   *
6622
   * @param string $str
6623
   *
6624
   * @return string|string[]
6625
   *
6626
   * @deprecated
6627
   */
6628
  public static function toIso8859($str)
6629
  {
6630
    return self::to_iso8859($str);
6631
  }
6632
6633
  /**
6634
   * alias for "UTF8::to_latin1()"
6635
   *
6636
   * @see UTF8::to_latin1()
6637
   *
6638
   * @param $str
6639
   *
6640
   * @return string
6641
   *
6642
   * @deprecated
6643
   */
6644
  public static function toLatin1($str)
6645
  {
6646
    return self::to_latin1($str);
6647
  }
6648
6649
  /**
6650
   * alias for "UTF8::to_utf8()"
6651
   *
6652
   * @see UTF8::to_utf8()
6653
   *
6654
   * @param string $str
6655
   *
6656
   * @return string
6657
   *
6658
   * @deprecated
6659
   */
6660
  public static function toUTF8($str)
6661
  {
6662
    return self::to_utf8($str);
6663
  }
6664
6665
  /**
6666
   * Convert a string into ASCII.
6667
   *
6668
   * @param string $str     <p>The input string.</p>
6669
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6670
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6671
   *                        performance</p>
6672
   *
6673
   * @return string
6674
   */
6675
  public static function to_ascii($str, $unknown = '?', $strict = false)
6676
  {
6677
    static $UTF8_TO_ASCII;
6678
6679
    // init
6680
    $str = (string)$str;
6681
6682
    if (!isset($str[0])) {
6683
      return '';
6684
    }
6685
6686
    $str = self::clean($str, true, true, true);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The expression self::clean($str, true, true, true); of type string|false adds false to the return on line 6690 which is incompatible with the return type documented by voku\helper\UTF8::to_ascii of type string. It seems like you forgot to handle an error condition.
Loading history...
6687
6688
    // check if we only have ASCII
6689
    if (self::is_ascii($str) === true) {
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str, true, true, true) on line 6686 can also be of type false; however, voku\helper\UTF8::is_ascii() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6690
      return $str;
6691
    }
6692
6693
    if ($strict === true) {
6694
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6695
        self::checkForSupport();
6696
      }
6697
6698
      if (
6699
          self::$SUPPORT['intl'] === true
6700
          &&
6701
          Bootup::is_php('5.4') === true
6702
      ) {
6703
6704
        // HACK for issue from "transliterator_transliterate()"
6705
        $str = str_replace(
6706
            'ℌ',
6707
            'H',
6708
            $str
6709
        );
6710
6711
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6712
6713
        // check again, if we only have ASCII, now ...
6714
        if (self::is_ascii($str) === true) {
6715
          return $str;
6716
        }
6717
6718
      }
6719
    }
6720
6721
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6722
    $chars = $ar[0];
6723
    foreach ($chars as &$c) {
6724
6725
      $ordC0 = ord($c[0]);
6726
6727
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6728
        continue;
6729
      }
6730
6731
      $ordC1 = ord($c[1]);
6732
6733
      // ASCII - next please
6734
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6735
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6736
      }
6737
6738
      if ($ordC0 >= 224) {
6739
        $ordC2 = ord($c[2]);
6740
6741
        if ($ordC0 <= 239) {
6742
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6743
        }
6744
6745
        if ($ordC0 >= 240) {
6746
          $ordC3 = ord($c[3]);
6747
6748
          if ($ordC0 <= 247) {
6749
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6750
          }
6751
6752
          if ($ordC0 >= 248) {
6753
            $ordC4 = ord($c[4]);
6754
6755 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6756
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6757
            }
6758
6759
            if ($ordC0 >= 252) {
6760
              $ordC5 = ord($c[5]);
6761
6762 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6763
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6764
              }
6765
            }
6766
          }
6767
        }
6768
      }
6769
6770
      if ($ordC0 == 254 || $ordC0 == 255) {
6771
        $c = $unknown;
6772
        continue;
6773
      }
6774
6775
      if (!isset($ord)) {
6776
        $c = $unknown;
6777
        continue;
6778
      }
6779
6780
      $bank = $ord >> 8;
6781
      if (!isset($UTF8_TO_ASCII[$bank])) {
6782
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6783
        if ($UTF8_TO_ASCII[$bank] === false) {
6784
          $UTF8_TO_ASCII[$bank] = array();
6785
        }
6786
      }
6787
6788
      $newchar = $ord & 255;
6789
6790
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6791
6792
        // keep for debugging
6793
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
45% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6794
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6795
        echo "char: " . $c . "\n";
6796
        echo "ord: " . $ord . "\n";
6797
        echo "newchar: " . $newchar . "\n";
6798
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6799
        echo "bank:" . $bank . "\n\n";
6800
        */
6801
6802
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6803
      } else {
6804
6805
        // keep for debugging missing chars
6806
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
41% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6807
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6808
        echo "char: " . $c . "\n";
6809
        echo "ord: " . $ord . "\n";
6810
        echo "newchar: " . $newchar . "\n";
6811
        echo "bank:" . $bank . "\n\n";
6812
        */
6813
6814
        $c = $unknown;
6815
      }
6816
    }
6817
6818
    return implode('', $chars);
6819
  }
6820
6821
  /**
6822
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6823
   *
6824
   * @param string|string[] $str
6825
   *
6826
   * @return string|string[]
6827
   */
6828
  public static function to_iso8859($str)
6829
  {
6830
    if (is_array($str) === true) {
6831
6832
      /** @noinspection ForeachSourceInspection */
6833
      foreach ($str as $k => $v) {
6834
        /** @noinspection AlterInForeachInspection */
6835
        /** @noinspection OffsetOperationsInspection */
6836
        $str[$k] = self::to_iso8859($v);
6837
      }
6838
6839
      return $str;
6840
    }
6841
6842
    $str = (string)$str;
6843
6844
    if (!isset($str[0])) {
6845
      return '';
6846
    }
6847
6848
    return self::utf8_decode($str);
6849
  }
6850
6851
  /**
6852
   * alias for "UTF8::to_iso8859()"
6853
   *
6854
   * @see UTF8::to_iso8859()
6855
   *
6856
   * @param string|string[] $str
6857
   *
6858
   * @return string|string[]
6859
   */
6860
  public static function to_latin1($str)
6861
  {
6862
    return self::to_iso8859($str);
6863
  }
6864
6865
  /**
6866
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6867
   *
6868
   * <ul>
6869
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6870
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
6871
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6872
   * case.</li>
6873
   * </ul>
6874
   *
6875
   * @param string|string[] $str                    <p>Any string or array.</p>
6876
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6877
   *
6878
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6879
   */
6880
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6881
  {
6882
    if (is_array($str) === true) {
6883
      /** @noinspection ForeachSourceInspection */
6884
      foreach ($str as $k => $v) {
6885
        /** @noinspection AlterInForeachInspection */
6886
        /** @noinspection OffsetOperationsInspection */
6887
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6888
      }
6889
6890
      return $str;
6891
    }
6892
6893
    $str = (string)$str;
6894
6895
    if (!isset($str[0])) {
6896
      return $str;
6897
    }
6898
6899
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6900
      self::checkForSupport();
6901
    }
6902
6903 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6904
      $max = \mb_strlen($str, '8BIT');
6905
    } else {
6906
      $max = strlen($str);
6907
    }
6908
6909
    $buf = '';
6910
6911
    /** @noinspection ForeachInvariantsInspection */
6912
    for ($i = 0; $i < $max; $i++) {
6913
6914
      $c1 = $str[$i];
6915
6916
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6917
6918
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6919
6920
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6921
6922
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6923
            $buf .= $c1 . $c2;
6924
            $i++;
6925 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6926
            $cc1tmp = ord($c1) / 64;
6927
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6928
            $cc2 = ($c1 & "\x3F") | "\x80";
6929
            $buf .= $cc1 . $cc2;
6930
          }
6931
6932
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6933
6934
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6935
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6936
6937
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6938
            $buf .= $c1 . $c2 . $c3;
6939
            $i += 2;
6940 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6941
            $cc1tmp = ord($c1) / 64;
6942
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6943
            $cc2 = ($c1 & "\x3F") | "\x80";
6944
            $buf .= $cc1 . $cc2;
6945
          }
6946
6947
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6948
6949
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6950
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6951
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6952
6953
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6954
            $buf .= $c1 . $c2 . $c3 . $c4;
6955
            $i += 3;
6956 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6957
            $cc1tmp = ord($c1) / 64;
6958
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6959
            $cc2 = ($c1 & "\x3F") | "\x80";
6960
            $buf .= $cc1 . $cc2;
6961
          }
6962
6963 View Code Duplication
        } else { // doesn't look like UTF8, but should be converted
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6964
          $cc1tmp = ord($c1) / 64;
6965
          $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6966
          $cc2 = ($c1 & "\x3F") | "\x80";
6967
          $buf .= $cc1 . $cc2;
6968
        }
6969
6970
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6971
6972
        $ordC1 = ord($c1);
6973
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6974
          $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6975 View Code Duplication
        } else {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6976
          $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
6977
          $cc2 = ($c1 & "\x3F") | "\x80";
6978
          $buf .= $cc1 . $cc2;
6979
        }
6980
6981
      } else { // it doesn't need conversion
6982
        $buf .= $c1;
6983
      }
6984
    }
6985
6986
    // decode unicode escape sequences
6987
    $buf = preg_replace_callback(
6988
        '/\\\\u([0-9a-f]{4})/i',
6989
        function ($match) {
6990
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6991
        },
6992
        $buf
6993
    );
6994
6995
    // decode UTF-8 codepoints
6996
    if ($decodeHtmlEntityToUtf8 === true) {
6997
      $buf = self::html_entity_decode($buf);
6998
    }
6999
7000
    return $buf;
7001
  }
7002
7003
  /**
7004
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7005
   *
7006
   * INFO: This is slower then "trim()"
7007
   *
7008
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7009
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7010
   *
7011
   * @param string $str   <p>The string to be trimmed</p>
7012
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7013
   *
7014
   * @return string <p>The trimmed string.</p>
7015
   */
7016
  public static function trim($str = '', $chars = INF)
7017
  {
7018
    $str = (string)$str;
7019
7020
    if (!isset($str[0])) {
7021
      return '';
7022
    }
7023
7024
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7025
    if ($chars === INF || !$chars) {
7026
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7027
    }
7028
7029
    return self::rtrim(self::ltrim($str, $chars), $chars);
7030
  }
7031
7032
  /**
7033
   * Makes string's first char uppercase.
7034
   *
7035
   * @param string  $str       <p>The input string.</p>
7036
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
7037
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
7038
   *
7039
   * @return string <p>The resulting string</p>
7040
   */
7041
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
7042
  {
7043
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
7044
  }
7045
7046
  /**
7047
   * alias for "UTF8::ucfirst()"
7048
   *
7049
   * @see UTF8::ucfirst()
7050
   *
7051
   * @param string  $word
7052
   * @param string  $encoding
7053
   * @param boolean $cleanUtf8
7054
   *
7055
   * @return string
7056
   */
7057
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7058
  {
7059
    return self::ucfirst($word, $encoding, $cleanUtf8);
7060
  }
7061
7062
  /**
7063
   * Uppercase for all words in the string.
7064
   *
7065
   * @param string   $str        <p>The input string.</p>
7066
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7067
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7068
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
7069
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
7070
   *
7071
   * @return string
7072
   */
7073
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7074
  {
7075
    if (!$str) {
7076
      return '';
7077
    }
7078
7079
    $words = self::str_to_words($str, $charlist);
7080
    $newWords = array();
7081
7082
    if (count($exceptions) > 0) {
7083
      $useExceptions = true;
7084
    } else {
7085
      $useExceptions = false;
7086
    }
7087
7088
    foreach ($words as $word) {
7089
7090
      if (!$word) {
7091
        continue;
7092
      }
7093
7094
      if (
7095
          ($useExceptions === false)
7096
          ||
7097
          (
7098
              $useExceptions === true
7099
              &&
7100
              !in_array($word, $exceptions, true)
7101
          )
7102
      ) {
7103
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
7104
      }
7105
7106
      $newWords[] = $word;
7107
    }
7108
7109
    return implode('', $newWords);
7110
  }
7111
7112
  /**
7113
   * Multi decode html entity & fix urlencoded-win1252-chars.
7114
   *
7115
   * e.g:
7116
   * 'test+test'                     => 'test test'
7117
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7118
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7119
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7120
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7121
   * 'Düsseldorf'                   => 'Düsseldorf'
7122
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7123
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7124
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7125
   *
7126
   * @param string $str          <p>The input string.</p>
7127
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7128
   *
7129
   * @return string
7130
   */
7131 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7132
  {
7133
    $str = (string)$str;
7134
7135
    if (!isset($str[0])) {
7136
      return '';
7137
    }
7138
7139
    $pattern = '/%u([0-9a-f]{3,4})/i';
7140
    if (preg_match($pattern, $str)) {
7141
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7142
    }
7143
7144
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7145
7146
    do {
7147
      $str_compare = $str;
7148
7149
      $str = self::fix_simple_utf8(
7150
          urldecode(
7151
              self::html_entity_decode(
7152
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7153
                  $flags
7154
              )
7155
          )
7156
      );
7157
7158
    } while ($multi_decode === true && $str_compare !== $str);
7159
7160
    return (string)$str;
7161
  }
7162
7163
  /**
7164
   * Return a array with "urlencoded"-win1252 -> UTF-8
7165
   *
7166
   * @deprecated use the "UTF8::urldecode()" function to decode a string
7167
   *
7168
   * @return array
7169
   */
7170
  public static function urldecode_fix_win1252_chars()
7171
  {
7172
    return array(
7173
        '%20' => ' ',
7174
        '%21' => '!',
7175
        '%22' => '"',
7176
        '%23' => '#',
7177
        '%24' => '$',
7178
        '%25' => '%',
7179
        '%26' => '&',
7180
        '%27' => "'",
7181
        '%28' => '(',
7182
        '%29' => ')',
7183
        '%2A' => '*',
7184
        '%2B' => '+',
7185
        '%2C' => ',',
7186
        '%2D' => '-',
7187
        '%2E' => '.',
7188
        '%2F' => '/',
7189
        '%30' => '0',
7190
        '%31' => '1',
7191
        '%32' => '2',
7192
        '%33' => '3',
7193
        '%34' => '4',
7194
        '%35' => '5',
7195
        '%36' => '6',
7196
        '%37' => '7',
7197
        '%38' => '8',
7198
        '%39' => '9',
7199
        '%3A' => ':',
7200
        '%3B' => ';',
7201
        '%3C' => '<',
7202
        '%3D' => '=',
7203
        '%3E' => '>',
7204
        '%3F' => '?',
7205
        '%40' => '@',
7206
        '%41' => 'A',
7207
        '%42' => 'B',
7208
        '%43' => 'C',
7209
        '%44' => 'D',
7210
        '%45' => 'E',
7211
        '%46' => 'F',
7212
        '%47' => 'G',
7213
        '%48' => 'H',
7214
        '%49' => 'I',
7215
        '%4A' => 'J',
7216
        '%4B' => 'K',
7217
        '%4C' => 'L',
7218
        '%4D' => 'M',
7219
        '%4E' => 'N',
7220
        '%4F' => 'O',
7221
        '%50' => 'P',
7222
        '%51' => 'Q',
7223
        '%52' => 'R',
7224
        '%53' => 'S',
7225
        '%54' => 'T',
7226
        '%55' => 'U',
7227
        '%56' => 'V',
7228
        '%57' => 'W',
7229
        '%58' => 'X',
7230
        '%59' => 'Y',
7231
        '%5A' => 'Z',
7232
        '%5B' => '[',
7233
        '%5C' => '\\',
7234
        '%5D' => ']',
7235
        '%5E' => '^',
7236
        '%5F' => '_',
7237
        '%60' => '`',
7238
        '%61' => 'a',
7239
        '%62' => 'b',
7240
        '%63' => 'c',
7241
        '%64' => 'd',
7242
        '%65' => 'e',
7243
        '%66' => 'f',
7244
        '%67' => 'g',
7245
        '%68' => 'h',
7246
        '%69' => 'i',
7247
        '%6A' => 'j',
7248
        '%6B' => 'k',
7249
        '%6C' => 'l',
7250
        '%6D' => 'm',
7251
        '%6E' => 'n',
7252
        '%6F' => 'o',
7253
        '%70' => 'p',
7254
        '%71' => 'q',
7255
        '%72' => 'r',
7256
        '%73' => 's',
7257
        '%74' => 't',
7258
        '%75' => 'u',
7259
        '%76' => 'v',
7260
        '%77' => 'w',
7261
        '%78' => 'x',
7262
        '%79' => 'y',
7263
        '%7A' => 'z',
7264
        '%7B' => '{',
7265
        '%7C' => '|',
7266
        '%7D' => '}',
7267
        '%7E' => '~',
7268
        '%7F' => '',
7269
        '%80' => '`',
7270
        '%81' => '',
7271
        '%82' => '‚',
7272
        '%83' => 'ƒ',
7273
        '%84' => '„',
7274
        '%85' => '…',
7275
        '%86' => '†',
7276
        '%87' => '‡',
7277
        '%88' => 'ˆ',
7278
        '%89' => '‰',
7279
        '%8A' => 'Š',
7280
        '%8B' => '‹',
7281
        '%8C' => 'Œ',
7282
        '%8D' => '',
7283
        '%8E' => 'Ž',
7284
        '%8F' => '',
7285
        '%90' => '',
7286
        '%91' => '‘',
7287
        '%92' => '’',
7288
        '%93' => '“',
7289
        '%94' => '”',
7290
        '%95' => '•',
7291
        '%96' => '–',
7292
        '%97' => '—',
7293
        '%98' => '˜',
7294
        '%99' => '™',
7295
        '%9A' => 'š',
7296
        '%9B' => '›',
7297
        '%9C' => 'œ',
7298
        '%9D' => '',
7299
        '%9E' => 'ž',
7300
        '%9F' => 'Ÿ',
7301
        '%A0' => '',
7302
        '%A1' => '¡',
7303
        '%A2' => '¢',
7304
        '%A3' => '£',
7305
        '%A4' => '¤',
7306
        '%A5' => '¥',
7307
        '%A6' => '¦',
7308
        '%A7' => '§',
7309
        '%A8' => '¨',
7310
        '%A9' => '©',
7311
        '%AA' => 'ª',
7312
        '%AB' => '«',
7313
        '%AC' => '¬',
7314
        '%AD' => '',
7315
        '%AE' => '®',
7316
        '%AF' => '¯',
7317
        '%B0' => '°',
7318
        '%B1' => '±',
7319
        '%B2' => '²',
7320
        '%B3' => '³',
7321
        '%B4' => '´',
7322
        '%B5' => 'µ',
7323
        '%B6' => '¶',
7324
        '%B7' => '·',
7325
        '%B8' => '¸',
7326
        '%B9' => '¹',
7327
        '%BA' => 'º',
7328
        '%BB' => '»',
7329
        '%BC' => '¼',
7330
        '%BD' => '½',
7331
        '%BE' => '¾',
7332
        '%BF' => '¿',
7333
        '%C0' => 'À',
7334
        '%C1' => 'Á',
7335
        '%C2' => 'Â',
7336
        '%C3' => 'Ã',
7337
        '%C4' => 'Ä',
7338
        '%C5' => 'Å',
7339
        '%C6' => 'Æ',
7340
        '%C7' => 'Ç',
7341
        '%C8' => 'È',
7342
        '%C9' => 'É',
7343
        '%CA' => 'Ê',
7344
        '%CB' => 'Ë',
7345
        '%CC' => 'Ì',
7346
        '%CD' => 'Í',
7347
        '%CE' => 'Î',
7348
        '%CF' => 'Ï',
7349
        '%D0' => 'Ð',
7350
        '%D1' => 'Ñ',
7351
        '%D2' => 'Ò',
7352
        '%D3' => 'Ó',
7353
        '%D4' => 'Ô',
7354
        '%D5' => 'Õ',
7355
        '%D6' => 'Ö',
7356
        '%D7' => '×',
7357
        '%D8' => 'Ø',
7358
        '%D9' => 'Ù',
7359
        '%DA' => 'Ú',
7360
        '%DB' => 'Û',
7361
        '%DC' => 'Ü',
7362
        '%DD' => 'Ý',
7363
        '%DE' => 'Þ',
7364
        '%DF' => 'ß',
7365
        '%E0' => 'à',
7366
        '%E1' => 'á',
7367
        '%E2' => 'â',
7368
        '%E3' => 'ã',
7369
        '%E4' => 'ä',
7370
        '%E5' => 'å',
7371
        '%E6' => 'æ',
7372
        '%E7' => 'ç',
7373
        '%E8' => 'è',
7374
        '%E9' => 'é',
7375
        '%EA' => 'ê',
7376
        '%EB' => 'ë',
7377
        '%EC' => 'ì',
7378
        '%ED' => 'í',
7379
        '%EE' => 'î',
7380
        '%EF' => 'ï',
7381
        '%F0' => 'ð',
7382
        '%F1' => 'ñ',
7383
        '%F2' => 'ò',
7384
        '%F3' => 'ó',
7385
        '%F4' => 'ô',
7386
        '%F5' => 'õ',
7387
        '%F6' => 'ö',
7388
        '%F7' => '÷',
7389
        '%F8' => 'ø',
7390
        '%F9' => 'ù',
7391
        '%FA' => 'ú',
7392
        '%FB' => 'û',
7393
        '%FC' => 'ü',
7394
        '%FD' => 'ý',
7395
        '%FE' => 'þ',
7396
        '%FF' => 'ÿ',
7397
    );
7398
  }
7399
7400
  /**
7401
   * Decodes an UTF-8 string to ISO-8859-1.
7402
   *
7403
   * @param string $str <p>The input string.</p>
7404
   *
7405
   * @return string
7406
   */
7407
  public static function utf8_decode($str)
7408
  {
7409
    // init
7410
    $str = (string)$str;
7411
7412
    if (!isset($str[0])) {
7413
      return '';
7414
    }
7415
7416
    $str = (string)self::to_utf8($str);
7417
7418
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7419
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7420
7421
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7422
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7423
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7424
    }
7425
7426
    /** @noinspection PhpInternalEntityUsedInspection */
7427
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7428
7429
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7430
      self::checkForSupport();
7431
    }
7432
7433 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7434
      $len = \mb_strlen($str, '8BIT');
7435
    } else {
7436
      $len = strlen($str);
7437
    }
7438
7439
    /** @noinspection ForeachInvariantsInspection */
7440
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7441
      switch ($str[$i] & "\xF0") {
7442
        case "\xC0":
7443
        case "\xD0":
7444
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7445
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7446
          break;
7447
7448
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7449
          ++$i;
7450
        case "\xE0":
7451
          $str[$j] = '?';
7452
          $i += 2;
7453
          break;
7454
7455
        default:
7456
          $str[$j] = $str[$i];
7457
      }
7458
    }
7459
7460
    return self::substr($str, 0, $j, '8BIT');
7461
  }
7462
7463
  /**
7464
   * Encodes an ISO-8859-1 string to UTF-8.
7465
   *
7466
   * @param string $str <p>The input string.</p>
7467
   *
7468
   * @return string
7469
   */
7470
  public static function utf8_encode($str)
7471
  {
7472
    // init
7473
    $str = (string)$str;
7474
7475
    if (!isset($str[0])) {
7476
      return '';
7477
    }
7478
7479
    $str = \utf8_encode($str);
7480
7481
    if (false === strpos($str, "\xC2")) {
7482
      return $str;
7483
    }
7484
7485
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7486
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7487
7488
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7489
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7490
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7491
    }
7492
7493
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7494
  }
7495
7496
  /**
7497
   * fix -> utf8-win1252 chars
7498
   *
7499
   * @param string $str <p>The input string.</p>
7500
   *
7501
   * @return string
7502
   *
7503
   * @deprecated use "UTF8::fix_simple_utf8()"
7504
   */
7505
  public static function utf8_fix_win1252_chars($str)
7506
  {
7507
    return self::fix_simple_utf8($str);
7508
  }
7509
7510
  /**
7511
   * Returns an array with all utf8 whitespace characters.
7512
   *
7513
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7514
   *
7515
   * @author: Derek E. [email protected]
7516
   *
7517
   * @return array <p>
7518
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7519
   *               as defined in above URL.
7520
   *               </p>
7521
   */
7522
  public static function whitespace_table()
7523
  {
7524
    return self::$WHITESPACE_TABLE;
7525
  }
7526
7527
  /**
7528
   * Limit the number of words in a string.
7529
   *
7530
   * @param string $str      <p>The input string.</p>
7531
   * @param int    $words    <p>The limit of words as integer.</p>
7532
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7533
   *
7534
   * @return string
7535
   */
7536
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7537
  {
7538
    $str = (string)$str;
7539
7540
    if (!isset($str[0])) {
7541
      return '';
7542
    }
7543
7544
    $words = (int)$words;
7545
7546
    if ($words < 1) {
7547
      return '';
7548
    }
7549
7550
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7551
7552
    if (
7553
        !isset($matches[0])
7554
        ||
7555
        self::strlen($str) === self::strlen($matches[0])
7556
    ) {
7557
      return $str;
7558
    }
7559
7560
    return self::rtrim($matches[0]) . $strAddOn;
7561
  }
7562
7563
  /**
7564
   * Wraps a string to a given number of characters
7565
   *
7566
   * @link  http://php.net/manual/en/function.wordwrap.php
7567
   *
7568
   * @param string $str   <p>The input string.</p>
7569
   * @param int    $width [optional] <p>The column width.</p>
7570
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7571
   * @param bool   $cut   [optional] <p>
7572
   *                      If the cut is set to true, the string is
7573
   *                      always wrapped at or before the specified width. So if you have
7574
   *                      a word that is larger than the given width, it is broken apart.
7575
   *                      </p>
7576
   *
7577
   * @return string <p>The given string wrapped at the specified column.</p>
7578
   */
7579
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7580
  {
7581
    $str = (string)$str;
7582
    $break = (string)$break;
7583
7584
    if (!isset($str[0], $break[0])) {
7585
      return '';
7586
    }
7587
7588
    $w = '';
7589
    $strSplit = explode($break, $str);
7590
    $count = count($strSplit);
7591
7592
    $chars = array();
7593
    /** @noinspection ForeachInvariantsInspection */
7594
    for ($i = 0; $i < $count; ++$i) {
7595
7596
      if ($i) {
7597
        $chars[] = $break;
7598
        $w .= '#';
7599
      }
7600
7601
      $c = $strSplit[$i];
7602
      unset($strSplit[$i]);
7603
7604
      foreach (self::split($c) as $c) {
7605
        $chars[] = $c;
7606
        $w .= ' ' === $c ? ' ' : '?';
7607
      }
7608
    }
7609
7610
    $strReturn = '';
7611
    $j = 0;
7612
    $b = $i = -1;
7613
    $w = wordwrap($w, $width, '#', $cut);
7614
7615
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7616
      for (++$i; $i < $b; ++$i) {
7617
        $strReturn .= $chars[$j];
7618
        unset($chars[$j++]);
7619
      }
7620
7621
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7622
        unset($chars[$j++]);
7623
      }
7624
7625
      $strReturn .= $break;
7626
    }
7627
7628
    return $strReturn . implode('', $chars);
7629
  }
7630
7631
  /**
7632
   * Returns an array of Unicode White Space characters.
7633
   *
7634
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7635
   */
7636
  public static function ws()
7637
  {
7638
    return self::$WHITESPACE;
7639
  }
7640
7641
}
7642