Completed
Push — master ( 2679c3...178b6a )
by Lars
06:25
created

UTF8::str_replace_first()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 9
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
dl 0
loc 9
ccs 0
cts 0
cp 0
rs 9.6666
c 0
b 0
f 0
cc 2
eloc 5
nc 2
nop 3
crap 6
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
93
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
94
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
96
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
98
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
99
  );
100
101
  /**
102
   * Numeric code point => UTF-8 Character
103
   *
104
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
105
   *
106
   * @var array
107
   */
108
  private static $whitespace = array(
109
    // NUL Byte
110
    0     => "\x0",
111
    // Tab
112
    9     => "\x9",
113
    // New Line
114
    10    => "\xa",
115
    // Vertical Tab
116
    11    => "\xb",
117
    // Carriage Return
118
    13    => "\xd",
119
    // Ordinary Space
120
    32    => "\x20",
121
    // NO-BREAK SPACE
122
    160   => "\xc2\xa0",
123
    // OGHAM SPACE MARK
124
    5760  => "\xe1\x9a\x80",
125
    // MONGOLIAN VOWEL SEPARATOR
126
    6158  => "\xe1\xa0\x8e",
127
    // EN QUAD
128
    8192  => "\xe2\x80\x80",
129
    // EM QUAD
130
    8193  => "\xe2\x80\x81",
131
    // EN SPACE
132
    8194  => "\xe2\x80\x82",
133
    // EM SPACE
134
    8195  => "\xe2\x80\x83",
135
    // THREE-PER-EM SPACE
136
    8196  => "\xe2\x80\x84",
137
    // FOUR-PER-EM SPACE
138
    8197  => "\xe2\x80\x85",
139
    // SIX-PER-EM SPACE
140
    8198  => "\xe2\x80\x86",
141
    // FIGURE SPACE
142
    8199  => "\xe2\x80\x87",
143
    // PUNCTUATION SPACE
144
    8200  => "\xe2\x80\x88",
145
    // THIN SPACE
146
    8201  => "\xe2\x80\x89",
147
    //HAIR SPACE
148
    8202  => "\xe2\x80\x8a",
149
    // LINE SEPARATOR
150
    8232  => "\xe2\x80\xa8",
151
    // PARAGRAPH SEPARATOR
152
    8233  => "\xe2\x80\xa9",
153
    // NARROW NO-BREAK SPACE
154
    8239  => "\xe2\x80\xaf",
155
    // MEDIUM MATHEMATICAL SPACE
156
    8287  => "\xe2\x81\x9f",
157
    // IDEOGRAPHIC SPACE
158
    12288 => "\xe3\x80\x80",
159
  );
160
161
  /**
162
   * @var array
163
   */
164
  private static $whitespaceTable = array(
165
      'SPACE'                     => "\x20",
166
      'NO-BREAK SPACE'            => "\xc2\xa0",
167
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
168
      'EN QUAD'                   => "\xe2\x80\x80",
169
      'EM QUAD'                   => "\xe2\x80\x81",
170
      'EN SPACE'                  => "\xe2\x80\x82",
171
      'EM SPACE'                  => "\xe2\x80\x83",
172
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
173
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
174
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
175
      'FIGURE SPACE'              => "\xe2\x80\x87",
176
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
177
      'THIN SPACE'                => "\xe2\x80\x89",
178
      'HAIR SPACE'                => "\xe2\x80\x8a",
179
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
180
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
181
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
182
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
183
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
184
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
185
  );
186
187
  /**
188
   * bidirectional text chars
189
   *
190
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
191
   *
192
   * @var array
193
   */
194
  private static $bidiUniCodeControlsTable = array(
195
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
196
    8234 => "\xE2\x80\xAA",
197
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
198
    8235 => "\xE2\x80\xAB",
199
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
200
    8236 => "\xE2\x80\xAC",
201
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
202
    8237 => "\xE2\x80\xAD",
203
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
204
    8238 => "\xE2\x80\xAE",
205
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
206
    8294 => "\xE2\x81\xA6",
207
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
208
    8295 => "\xE2\x81\xA7",
209
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
210
    8296 => "\xE2\x81\xA8",
211
    // POP DIRECTIONAL ISOLATE
212
    8297 => "\xE2\x81\xA9",
213
  );
214
215
  /**
216
   * @var array
217
   */
218
  private static $commonCaseFold = array(
219
      'ſ'            => 's',
220
      "\xCD\x85"     => 'ι',
221
      'ς'            => 'σ',
222
      "\xCF\x90"     => 'β',
223
      "\xCF\x91"     => 'θ',
224
      "\xCF\x95"     => 'φ',
225
      "\xCF\x96"     => 'π',
226
      "\xCF\xB0"     => 'κ',
227
      "\xCF\xB1"     => 'ρ',
228
      "\xCF\xB5"     => 'ε',
229
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
230
      "\xE1\xBE\xBE" => 'ι',
231
  );
232
233
  /**
234
   * @var array
235
   */
236
  private static $brokenUtf8ToUtf8 = array(
237
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
238
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
239
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
240
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
241
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
242
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
243
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
244
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
245
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
246
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
247
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
248
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
249
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
250
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
251
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
252
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
253
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
254
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
255
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
256
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
257
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
258
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
259
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
260
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
261
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
262
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
263
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
264
      'ü'       => 'ü',
265
      'ä'       => 'ä',
266
      'ö'       => 'ö',
267
      'Ö'       => 'Ö',
268
      'ß'       => 'ß',
269
      'Ã '       => 'à',
270
      'á'       => 'á',
271
      'â'       => 'â',
272
      'ã'       => 'ã',
273
      'ù'       => 'ù',
274
      'ú'       => 'ú',
275
      'û'       => 'û',
276
      'Ù'       => 'Ù',
277
      'Ú'       => 'Ú',
278
      'Û'       => 'Û',
279
      'Ü'       => 'Ü',
280
      'ò'       => 'ò',
281
      'ó'       => 'ó',
282
      'ô'       => 'ô',
283
      'è'       => 'è',
284
      'é'       => 'é',
285
      'ê'       => 'ê',
286
      'ë'       => 'ë',
287
      'À'       => 'À',
288
      'Á'       => 'Á',
289
      'Â'       => 'Â',
290
      'Ã'       => 'Ã',
291
      'Ä'       => 'Ä',
292
      'Ã…'       => 'Å',
293
      'Ç'       => 'Ç',
294
      'È'       => 'È',
295
      'É'       => 'É',
296
      'Ê'       => 'Ê',
297
      'Ë'       => 'Ë',
298
      'ÃŒ'       => 'Ì',
299
      'Í'       => 'Í',
300
      'ÃŽ'       => 'Î',
301
      'Ï'       => 'Ï',
302
      'Ñ'       => 'Ñ',
303
      'Ã’'       => 'Ò',
304
      'Ó'       => 'Ó',
305
      'Ô'       => 'Ô',
306
      'Õ'       => 'Õ',
307
      'Ø'       => 'Ø',
308
      'Ã¥'       => 'å',
309
      'æ'       => 'æ',
310
      'ç'       => 'ç',
311
      'ì'       => 'ì',
312
      'í'       => 'í',
313
      'î'       => 'î',
314
      'ï'       => 'ï',
315
      'ð'       => 'ð',
316
      'ñ'       => 'ñ',
317
      'õ'       => 'õ',
318
      'ø'       => 'ø',
319
      'ý'       => 'ý',
320
      'ÿ'       => 'ÿ',
321
      '€'      => '€',
322
  );
323
324
  /**
325
   * @var array
326
   */
327
  private static $utf8ToWin1252 = array(
328
      "\xe2\x82\xac" => "\x80", // EURO SIGN
329
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
330
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
331
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
332
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
333
      "\xe2\x80\xa0" => "\x86", // DAGGER
334
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
335
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
336
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
337
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
338
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
339
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
340
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
341
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
342
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
343
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
344
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
345
      "\xe2\x80\xa2" => "\x95", // BULLET
346
      "\xe2\x80\x93" => "\x96", // EN DASH
347
      "\xe2\x80\x94" => "\x97", // EM DASH
348
      "\xcb\x9c"     => "\x98", // SMALL TILDE
349
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
350
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
351
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
352
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
353
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
354
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
355
  );
356
357
  /**
358
   * @var array
359
   */
360
  private static $utf8MSWord = array(
361
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
362
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
363
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
364
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
365
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
366
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
367
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
368
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
369
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
370
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
371
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
372
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
373
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
374
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
375
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
376
  );
377
378
  /**
379
   * @var array
380
   */
381
  private static $iconvEncoding = array(
382
      'ANSI_X3.4-1968',
383
      'ANSI_X3.4-1986',
384
      'ASCII',
385
      'CP367',
386
      'IBM367',
387
      'ISO-IR-6',
388
      'ISO646-US',
389
      'ISO_646.IRV:1991',
390
      'US',
391
      'US-ASCII',
392
      'CSASCII',
393
      'UTF-8',
394
      'ISO-10646-UCS-2',
395
      'UCS-2',
396
      'CSUNICODE',
397
      'UCS-2BE',
398
      'UNICODE-1-1',
399
      'UNICODEBIG',
400
      'CSUNICODE11',
401
      'UCS-2LE',
402
      'UNICODELITTLE',
403
      'ISO-10646-UCS-4',
404
      'UCS-4',
405
      'CSUCS4',
406
      'UCS-4BE',
407
      'UCS-4LE',
408
      'UTF-16',
409
      'UTF-16BE',
410
      'UTF-16LE',
411
      'UTF-32',
412
      'UTF-32BE',
413
      'UTF-32LE',
414
      'UNICODE-1-1-UTF-7',
415
      'UTF-7',
416
      'CSUNICODE11UTF7',
417
      'UCS-2-INTERNAL',
418
      'UCS-2-SWAPPED',
419
      'UCS-4-INTERNAL',
420
      'UCS-4-SWAPPED',
421
      'C99',
422
      'JAVA',
423
      'CP819',
424
      'IBM819',
425
      'ISO-8859-1',
426
      'ISO-IR-100',
427
      'ISO8859-1',
428
      'ISO_8859-1',
429
      'ISO_8859-1:1987',
430
      'L1',
431
      'LATIN1',
432
      'CSISOLATIN1',
433
      'ISO-8859-2',
434
      'ISO-IR-101',
435
      'ISO8859-2',
436
      'ISO_8859-2',
437
      'ISO_8859-2:1987',
438
      'L2',
439
      'LATIN2',
440
      'CSISOLATIN2',
441
      'ISO-8859-3',
442
      'ISO-IR-109',
443
      'ISO8859-3',
444
      'ISO_8859-3',
445
      'ISO_8859-3:1988',
446
      'L3',
447
      'LATIN3',
448
      'CSISOLATIN3',
449
      'ISO-8859-4',
450
      'ISO-IR-110',
451
      'ISO8859-4',
452
      'ISO_8859-4',
453
      'ISO_8859-4:1988',
454
      'L4',
455
      'LATIN4',
456
      'CSISOLATIN4',
457
      'CYRILLIC',
458
      'ISO-8859-5',
459
      'ISO-IR-144',
460
      'ISO8859-5',
461
      'ISO_8859-5',
462
      'ISO_8859-5:1988',
463
      'CSISOLATINCYRILLIC',
464
      'ARABIC',
465
      'ASMO-708',
466
      'ECMA-114',
467
      'ISO-8859-6',
468
      'ISO-IR-127',
469
      'ISO8859-6',
470
      'ISO_8859-6',
471
      'ISO_8859-6:1987',
472
      'CSISOLATINARABIC',
473
      'ECMA-118',
474
      'ELOT_928',
475
      'GREEK',
476
      'GREEK8',
477
      'ISO-8859-7',
478
      'ISO-IR-126',
479
      'ISO8859-7',
480
      'ISO_8859-7',
481
      'ISO_8859-7:1987',
482
      'ISO_8859-7:2003',
483
      'CSISOLATINGREEK',
484
      'HEBREW',
485
      'ISO-8859-8',
486
      'ISO-IR-138',
487
      'ISO8859-8',
488
      'ISO_8859-8',
489
      'ISO_8859-8:1988',
490
      'CSISOLATINHEBREW',
491
      'ISO-8859-9',
492
      'ISO-IR-148',
493
      'ISO8859-9',
494
      'ISO_8859-9',
495
      'ISO_8859-9:1989',
496
      'L5',
497
      'LATIN5',
498
      'CSISOLATIN5',
499
      'ISO-8859-10',
500
      'ISO-IR-157',
501
      'ISO8859-10',
502
      'ISO_8859-10',
503
      'ISO_8859-10:1992',
504
      'L6',
505
      'LATIN6',
506
      'CSISOLATIN6',
507
      'ISO-8859-11',
508
      'ISO8859-11',
509
      'ISO_8859-11',
510
      'ISO-8859-13',
511
      'ISO-IR-179',
512
      'ISO8859-13',
513
      'ISO_8859-13',
514
      'L7',
515
      'LATIN7',
516
      'ISO-8859-14',
517
      'ISO-CELTIC',
518
      'ISO-IR-199',
519
      'ISO8859-14',
520
      'ISO_8859-14',
521
      'ISO_8859-14:1998',
522
      'L8',
523
      'LATIN8',
524
      'ISO-8859-15',
525
      'ISO-IR-203',
526
      'ISO8859-15',
527
      'ISO_8859-15',
528
      'ISO_8859-15:1998',
529
      'LATIN-9',
530
      'ISO-8859-16',
531
      'ISO-IR-226',
532
      'ISO8859-16',
533
      'ISO_8859-16',
534
      'ISO_8859-16:2001',
535
      'L10',
536
      'LATIN10',
537
      'KOI8-R',
538
      'CSKOI8R',
539
      'KOI8-U',
540
      'KOI8-RU',
541
      'CP1250',
542
      'MS-EE',
543
      'WINDOWS-1250',
544
      'CP1251',
545
      'MS-CYRL',
546
      'WINDOWS-1251',
547
      'CP1252',
548
      'MS-ANSI',
549
      'WINDOWS-1252',
550
      'CP1253',
551
      'MS-GREEK',
552
      'WINDOWS-1253',
553
      'CP1254',
554
      'MS-TURK',
555
      'WINDOWS-1254',
556
      'CP1255',
557
      'MS-HEBR',
558
      'WINDOWS-1255',
559
      'CP1256',
560
      'MS-ARAB',
561
      'WINDOWS-1256',
562
      'CP1257',
563
      'WINBALTRIM',
564
      'WINDOWS-1257',
565
      'CP1258',
566
      'WINDOWS-1258',
567
      '850',
568
      'CP850',
569
      'IBM850',
570
      'CSPC850MULTILINGUAL',
571
      '862',
572
      'CP862',
573
      'IBM862',
574
      'CSPC862LATINHEBREW',
575
      '866',
576
      'CP866',
577
      'IBM866',
578
      'CSIBM866',
579
      'MAC',
580
      'MACINTOSH',
581
      'MACROMAN',
582
      'CSMACINTOSH',
583
      'MACCENTRALEUROPE',
584
      'MACICELAND',
585
      'MACCROATIAN',
586
      'MACROMANIA',
587
      'MACCYRILLIC',
588
      'MACUKRAINE',
589
      'MACGREEK',
590
      'MACTURKISH',
591
      'MACHEBREW',
592
      'MACARABIC',
593
      'MACTHAI',
594
      'HP-ROMAN8',
595
      'R8',
596
      'ROMAN8',
597
      'CSHPROMAN8',
598
      'NEXTSTEP',
599
      'ARMSCII-8',
600
      'GEORGIAN-ACADEMY',
601
      'GEORGIAN-PS',
602
      'KOI8-T',
603
      'CP154',
604
      'CYRILLIC-ASIAN',
605
      'PT154',
606
      'PTCP154',
607
      'CSPTCP154',
608
      'KZ-1048',
609
      'RK1048',
610
      'STRK1048-2002',
611
      'CSKZ1048',
612
      'MULELAO-1',
613
      'CP1133',
614
      'IBM-CP1133',
615
      'ISO-IR-166',
616
      'TIS-620',
617
      'TIS620',
618
      'TIS620-0',
619
      'TIS620.2529-1',
620
      'TIS620.2533-0',
621
      'TIS620.2533-1',
622
      'CP874',
623
      'WINDOWS-874',
624
      'VISCII',
625
      'VISCII1.1-1',
626
      'CSVISCII',
627
      'TCVN',
628
      'TCVN-5712',
629
      'TCVN5712-1',
630
      'TCVN5712-1:1993',
631
      'ISO-IR-14',
632
      'ISO646-JP',
633
      'JIS_C6220-1969-RO',
634
      'JP',
635
      'CSISO14JISC6220RO',
636
      'JISX0201-1976',
637
      'JIS_X0201',
638
      'X0201',
639
      'CSHALFWIDTHKATAKANA',
640
      'ISO-IR-87',
641
      'JIS0208',
642
      'JIS_C6226-1983',
643
      'JIS_X0208',
644
      'JIS_X0208-1983',
645
      'JIS_X0208-1990',
646
      'X0208',
647
      'CSISO87JISX0208',
648
      'ISO-IR-159',
649
      'JIS_X0212',
650
      'JIS_X0212-1990',
651
      'JIS_X0212.1990-0',
652
      'X0212',
653
      'CSISO159JISX02121990',
654
      'CN',
655
      'GB_1988-80',
656
      'ISO-IR-57',
657
      'ISO646-CN',
658
      'CSISO57GB1988',
659
      'CHINESE',
660
      'GB_2312-80',
661
      'ISO-IR-58',
662
      'CSISO58GB231280',
663
      'CN-GB-ISOIR165',
664
      'ISO-IR-165',
665
      'ISO-IR-149',
666
      'KOREAN',
667
      'KSC_5601',
668
      'KS_C_5601-1987',
669
      'KS_C_5601-1989',
670
      'CSKSC56011987',
671
      'EUC-JP',
672
      'EUCJP',
673
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
674
      'CSEUCPKDFMTJAPANESE',
675
      'MS_KANJI',
676
      'SHIFT-JIS',
677
      'SHIFT_JIS',
678
      'SJIS',
679
      'CSSHIFTJIS',
680
      'CP932',
681
      'ISO-2022-JP',
682
      'CSISO2022JP',
683
      'ISO-2022-JP-1',
684
      'ISO-2022-JP-2',
685
      'CSISO2022JP2',
686
      'CN-GB',
687
      'EUC-CN',
688
      'EUCCN',
689
      'GB2312',
690
      'CSGB2312',
691
      'GBK',
692
      'CP936',
693
      'MS936',
694
      'WINDOWS-936',
695
      'GB18030',
696
      'ISO-2022-CN',
697
      'CSISO2022CN',
698
      'ISO-2022-CN-EXT',
699
      'HZ',
700
      'HZ-GB-2312',
701
      'EUC-TW',
702
      'EUCTW',
703
      'CSEUCTW',
704
      'BIG-5',
705
      'BIG-FIVE',
706
      'BIG5',
707
      'BIGFIVE',
708
      'CN-BIG5',
709
      'CSBIG5',
710
      'CP950',
711
      'BIG5-HKSCS:1999',
712
      'BIG5-HKSCS:2001',
713
      'BIG5-HKSCS',
714
      'BIG5-HKSCS:2004',
715
      'BIG5HKSCS',
716
      'EUC-KR',
717
      'EUCKR',
718
      'CSEUCKR',
719
      'CP949',
720
      'UHC',
721
      'CP1361',
722
      'JOHAB',
723
      'ISO-2022-KR',
724
      'CSISO2022KR',
725
      'CP856',
726
      'CP922',
727
      'CP943',
728
      'CP1046',
729
      'CP1124',
730
      'CP1129',
731
      'CP1161',
732
      'IBM-1161',
733
      'IBM1161',
734
      'CSIBM1161',
735
      'CP1162',
736
      'IBM-1162',
737
      'IBM1162',
738
      'CSIBM1162',
739
      'CP1163',
740
      'IBM-1163',
741
      'IBM1163',
742
      'CSIBM1163',
743
      'DEC-KANJI',
744
      'DEC-HANYU',
745
      '437',
746
      'CP437',
747
      'IBM437',
748
      'CSPC8CODEPAGE437',
749
      'CP737',
750
      'CP775',
751
      'IBM775',
752
      'CSPC775BALTIC',
753
      '852',
754
      'CP852',
755
      'IBM852',
756
      'CSPCP852',
757
      'CP853',
758
      '855',
759
      'CP855',
760
      'IBM855',
761
      'CSIBM855',
762
      '857',
763
      'CP857',
764
      'IBM857',
765
      'CSIBM857',
766
      'CP858',
767
      '860',
768
      'CP860',
769
      'IBM860',
770
      'CSIBM860',
771
      '861',
772
      'CP-IS',
773
      'CP861',
774
      'IBM861',
775
      'CSIBM861',
776
      '863',
777
      'CP863',
778
      'IBM863',
779
      'CSIBM863',
780
      'CP864',
781
      'IBM864',
782
      'CSIBM864',
783
      '865',
784
      'CP865',
785
      'IBM865',
786
      'CSIBM865',
787
      '869',
788
      'CP-GR',
789
      'CP869',
790
      'IBM869',
791
      'CSIBM869',
792
      'CP1125',
793
      'EUC-JISX0213',
794
      'SHIFT_JISX0213',
795
      'ISO-2022-JP-3',
796
      'BIG5-2003',
797
      'ISO-IR-230',
798
      'TDS565',
799
      'ATARI',
800
      'ATARIST',
801
      'RISCOS-LATIN1',
802
  );
803
804
  /**
805
   * @var array
806
   */
807 1
  private static $support = array();
808
809 1
  /**
810 1
   * __construct()
811
   */
812
  public function __construct()
813
  {
814
    self::checkForSupport();
815
  }
816
817
  /**
818
   * Return the character at the specified position: $str[1] like functionality.
819
   *
820 2
   * @param string $str <p>A UTF-8 string.</p>
821
   * @param int    $pos <p>The position of character to return.</p>
822 2
   *
823
   * @return string <p>Single Multi-Byte character.</p>
824
   */
825
  public static function access($str, $pos)
826
  {
827
    $str = (string)$str;
828
    $pos = (int)$pos;
829
830
    if (!isset($str[0])) {
831
      return '';
832
    }
833
834 1
    if ($pos < 0) {
835
      return '';
836 1
    }
837 1
838 1
    return self::substr($str, $pos, 1);
839
  }
840 1
841
  /**
842
   * Prepends UTF-8 BOM character to the string and returns the whole string.
843
   *
844
   * INFO: If BOM already existed there, the Input string is returned.
845
   *
846
   * @param string $str <p>The input string.</p>
847
   *
848
   * @return string <p>The output string that contains BOM.</p>
849
   */
850 1
  public static function add_bom_to_string($str)
851
  {
852 1
    if (self::string_has_bom($str) === false) {
853
      $str = self::bom() . $str;
854
    }
855
856
    return $str;
857
  }
858
859
  /**
860 2
   * Convert binary into an string.
861
   *
862 2
   * @param mixed $bin 1|0
863
   *
864
   * @return string
865
   */
866
  public static function binary_to_str($bin)
867
  {
868
    if (!isset($bin[0])) {
869
      return '';
870
    }
871
872
    return pack('H*', base_convert($bin, 2, 16));
873
  }
874 1
875
  /**
876 1
   * Returns the UTF-8 Byte Order Mark Character.
877
   *
878
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
879
   *
880
   * @return string UTF-8 Byte Order Mark
881
   */
882
  public static function bom()
883
  {
884 2
    return "\xef\xbb\xbf";
885
  }
886 2
887
  /**
888 1
   * @alias of UTF8::chr_map()
889
   *
890 1
   * @see   UTF8::chr_map()
891 1
   *
892 1
   * @param string|array $callback
893 1
   * @param string       $str
894 1
   *
895 1
   * @return array
896 2
   */
897
  public static function callback($callback, $str)
898
  {
899
    return self::chr_map($callback, $str);
900
  }
901
902
  /**
903
   * This method will auto-detect your server environment for UTF-8 support.
904
   *
905
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
906
   */
907 9
  public static function checkForSupport()
908
  {
909 9
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
910 9
911 1
      self::$support['already_checked_via_portable_utf8'] = true;
912
913
      // http://php.net/manual/en/book.mbstring.php
914 9
      self::$support['mbstring'] = self::mbstring_loaded();
915
916
      // http://php.net/manual/en/book.iconv.php
917
      self::$support['iconv'] = self::iconv_loaded();
918 9
919
      // http://php.net/manual/en/book.intl.php
920
      self::$support['intl'] = self::intl_loaded();
921
922
      // http://php.net/manual/en/class.intlchar.php
923 9
      self::$support['intlChar'] = self::intlChar_loaded();
924 9
925 8
      // http://php.net/manual/en/book.pcre.php
926
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
927
    }
928
  }
929 8
930 6
  /**
931
   * Generates a UTF-8 encoded character from the given code point.
932
   *
933 7
   * INFO: opposite to UTF8::ord()
934 6
   *
935 6
   * @param int    $code_point <p>The code point for which to generate a character.</p>
936
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
937
   *
938 7
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
939 7
   */
940 7
  public static function chr($code_point, $encoding = 'UTF-8')
941 7
  {
942
    $i = (int)$code_point;
943
    if ($i !== $code_point) {
944 1
      return null;
945 1
    }
946 1
947 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
948 1
      self::checkForSupport();
949
    }
950
951
    if ($encoding !== 'UTF-8') {
952
      $encoding = self::normalize_encoding($encoding);
953
    } elseif (self::$support['intlChar'] === true) {
954
      return \IntlChar::chr($code_point);
955
    }
956
957
    // use static cache, if there is no support for "IntlChar"
958
    static $cache = array();
959
    $cacheKey = $code_point . $encoding;
960
    if (isset($cache[$cacheKey]) === true) {
961
      return $cache[$cacheKey];
962
    }
963 1
964
    if (0x80 > $code_point %= 0x200000) {
965 1
      $str = chr($code_point);
966
    } elseif (0x800 > $code_point) {
967 1
      $str = chr(0xC0 | $code_point >> 6) .
968
             chr(0x80 | $code_point & 0x3F);
969
    } elseif (0x10000 > $code_point) {
970
      $str = chr(0xE0 | $code_point >> 12) .
971
             chr(0x80 | $code_point >> 6 & 0x3F) .
972
             chr(0x80 | $code_point & 0x3F);
973
    } else {
974
      $str = chr(0xF0 | $code_point >> 18) .
975
             chr(0x80 | $code_point >> 12 & 0x3F) .
976
             chr(0x80 | $code_point >> 6 & 0x3F) .
977
             chr(0x80 | $code_point & 0x3F);
978
    }
979
980
    if ($encoding !== 'UTF-8') {
981
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
982 4
    }
983
984 4
    // add into static cache
985 3
    $cache[$cacheKey] = $str;
986
987
    return $str;
988 4
  }
989
990
  /**
991
   * Applies callback to all characters of a string.
992
   *
993
   * @param string|array $callback <p>The callback function.</p>
994
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
995
   *
996
   * @return array <p>The outcome of callback.</p>
997
   */
998 2
  public static function chr_map($callback, $str)
999
  {
1000 2
    $chars = self::split($str);
1001 2
1002 2
    return array_map($callback, $chars);
1003
  }
1004 2
1005
  /**
1006 2
   * Generates an array of byte length of each character of a Unicode string.
1007
   *
1008
   * 1 byte => U+0000  - U+007F
1009 2
   * 2 byte => U+0080  - U+07FF
1010
   * 3 byte => U+0800  - U+FFFF
1011 2
   * 4 byte => U+10000 - U+10FFFF
1012 2
   *
1013 2
   * @param string $str <p>The original Unicode string.</p>
1014
   *
1015 1
   * @return array <p>An array of byte lengths of each character.</p>
1016 1
   */
1017 1
  public static function chr_size_list($str)
1018
  {
1019
    $str = (string)$str;
1020
1021
    if (!isset($str[0])) {
1022
      return array();
1023 2
    }
1024
1025 2
    return array_map('strlen', self::split($str));
1026 2
  }
1027
1028 2
  /**
1029
   * Get a decimal code representation of a specific character.
1030
   *
1031
   * @param string $char <p>The input character.</p>
1032
   *
1033
   * @return int
1034
   */
1035
  public static function chr_to_decimal($char)
1036
  {
1037
    $char = (string)$char;
1038
    $code = self::ord($char[0]);
1039 1
    $bytes = 1;
1040
1041 1
    if (!($code & 0x80)) {
1042
      // 0xxxxxxx
1043
      return $code;
1044
    }
1045
1046
    if (($code & 0xe0) === 0xc0) {
1047
      // 110xxxxx
1048
      $bytes = 2;
1049
      $code &= ~0xc0;
1050
    } elseif (($code & 0xf0) === 0xe0) {
1051
      // 1110xxxx
1052
      $bytes = 3;
1053 1
      $code &= ~0xe0;
1054
    } elseif (($code & 0xf8) === 0xf0) {
1055 1
      // 11110xxx
1056
      $bytes = 4;
1057
      $code &= ~0xf0;
1058
    }
1059
1060
    for ($i = 2; $i <= $bytes; $i++) {
1061
      // 10xxxxxx
1062
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1063
    }
1064
1065
    return $code;
1066
  }
1067
1068
  /**
1069
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1070
   *
1071 44
   * @param string $char <p>The input character</p>
1072
   * @param string $pfix [optional]
1073
   *
1074
   * @return string <p>The code point encoded as U+xxxx<p>
1075
   */
1076
  public static function chr_to_hex($char, $pfix = 'U+')
1077
  {
1078
    if ($char === '&#0;') {
1079
      $char = '';
1080
    }
1081
1082
    return self::int_to_hex(self::ord($char), $pfix);
1083
  }
1084
1085
  /**
1086 44
   * alias for "UTF8::chr_to_decimal()"
1087 44
   *
1088
   * @see UTF8::chr_to_decimal()
1089 44
   *
1090 44
   * @param string $chr
1091
   *
1092 44
   * @return int
1093 17
   */
1094 17
  public static function chr_to_int($chr)
1095
  {
1096 44
    return self::chr_to_decimal($chr);
1097 12
  }
1098 12
1099
  /**
1100 44
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1101 5
   *
1102 5
   * @param string $body     <p>The original string to be split.</p>
1103
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1104 44
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1105
   *
1106
   * @return string <p>The chunked string</p>
1107
   */
1108
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1109
  {
1110
    return implode($end, self::split($body, $chunklen));
1111
  }
1112
1113
  /**
1114 4
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1115
   *
1116 4
   * @param string $str                     <p>The string to be sanitized.</p>
1117
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1118 4
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1119 1
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1120
   *                                        => "..."</p>
1121
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1122
   *                                        $normalize_whitespace</p>
1123 4
   *
1124
   * @return string <p>Clean UTF-8 encoded string.</p>
1125
   */
1126
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1127
  {
1128
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1129
    // caused connection reset problem on larger strings
1130 4
1131
    $regx = '/
1132 4
      (
1133
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1134
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1135
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1136
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1137
        ){1,100}                      # ...one or more times
1138
      )
1139
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1140
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1141
    /x';
1142
    $str = preg_replace($regx, '$1', $str);
1143
1144
    $str = self::replace_diamond_question_mark($str, '');
1145
    $str = self::remove_invisible_characters($str);
1146 5
1147
    if ($normalize_whitespace === true) {
1148 5
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1149 5
    }
1150 5
1151
    if ($normalize_msword === true) {
1152 5
      $str = self::normalize_msword($str);
1153
    }
1154 5
1155 5
    if ($remove_bom === true) {
1156 5
      $str = self::remove_bom($str);
1157
    }
1158 5
1159
    return $str;
1160 5
  }
1161 1
1162
  /**
1163 1
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1164 1
   *
1165 1
   * @param string $str <p>The input string.</p>
1166
   *
1167 1
   * @return string
1168 1
   */
1169
  public static function cleanup($str)
1170 5
  {
1171
    $str = (string)$str;
1172
1173
    if (!isset($str[0])) {
1174
      return '';
1175
    }
1176
1177
    // fixed ISO <-> UTF-8 Errors
1178
    $str = self::fix_simple_utf8($str);
1179
1180
    // remove all none UTF-8 symbols
1181
    // && remove diamond question mark (�)
1182 6
    // && remove remove invisible characters (e.g. "\0")
1183
    // && remove BOM
1184 6
    // && normalize whitespace chars (but keep non-breaking-spaces)
1185
    $str = self::clean($str, true, true, false, true);
1186
1187
    return (string)$str;
1188
  }
1189
1190
  /**
1191
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1192
   *
1193
   * INFO: opposite to UTF8::string()
1194 1
   *
1195
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1196 1
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1197 1
   *                                    default, code points will be returned as integers.</p>
1198 1
   *
1199
   * @return array <p>The array of code points.</p>
1200 1
   */
1201
  public static function codepoints($arg, $u_style = false)
1202
  {
1203
    if (is_string($arg)) {
1204
      $arg = self::split($arg);
1205
    }
1206
1207
    $arg = array_map(
1208
        array(
1209
            '\\voku\\helper\\UTF8',
1210
            'ord',
1211
        ),
1212
        $arg
1213
    );
1214
1215
    if ($u_style) {
1216 11
      $arg = array_map(
1217
          array(
1218 11
              '\\voku\\helper\\UTF8',
1219 11
              'int_to_hex',
1220
          ),
1221 11
          $arg
1222 5
      );
1223
    }
1224
1225 11
    return $arg;
1226 1
  }
1227 1
1228
  /**
1229 11
   * Returns count of characters used in a string.
1230
   *
1231
   * @param string $str       <p>The input string.</p>
1232
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1233 11
   *
1234
   * @return array <p>An associative array of Character as keys and
1235
   *               their count as values.</p>
1236 11
   */
1237
  public static function count_chars($str, $cleanUtf8 = false)
1238 1
  {
1239 11
    return array_count_values(self::split($str, 1, $cleanUtf8));
1240
  }
1241
1242
  /**
1243 11
   * Converts a int-value into an UTF-8 character.
1244
   *
1245
   * @param int $int
1246 11
   *
1247 1
   * @return string
1248 1
   */
1249 1
  public static function decimal_to_chr($int)
1250 11
  {
1251 11
    if (Bootup::is_php('5.4') === true) {
1252
      $flags = ENT_QUOTES | ENT_HTML5;
1253
    } else {
1254
      $flags = ENT_QUOTES;
1255
    }
1256 2
1257
    return self::html_entity_decode('&#' . $int . ';', $flags);
1258
  }
1259 1
1260
  /**
1261
   * Encode a string with a new charset-encoding.
1262 2
   *
1263 1
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1264
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1265
   *
1266 2
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1267 2
   * @param string $str      <p>The input string</p>
1268 2
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1269
   *                         /> otherwise we auto-detect the current string-encoding</p>
1270 2
   *
1271
   * @return string
1272 2
   */
1273 2
  public static function encode($encoding, $str, $force = true)
1274
  {
1275
    $str = (string)$str;
1276
    $encoding = (string)$encoding;
1277 1
1278
    if (!isset($str[0], $encoding[0])) {
1279
      return $str;
1280
    }
1281
1282
    if ($encoding !== 'UTF-8') {
1283
      $encoding = self::normalize_encoding($encoding);
1284
    }
1285
1286
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1287
      self::checkForSupport();
1288
    }
1289
1290
    $encodingDetected = self::str_detect_encoding($str);
1291
1292
    if (
1293
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1294
        &&
1295
        (
1296
            $force === true
1297
            ||
1298
            $encodingDetected !== $encoding
1299
        )
1300
    ) {
1301
1302
      if (
1303
          $encoding === 'UTF-8'
1304
          &&
1305
          (
1306
              $force === true
1307
              || $encodingDetected === 'UTF-8'
1308
              || $encodingDetected === 'WINDOWS-1252'
1309
              || $encodingDetected === 'ISO-8859-1'
1310
          )
1311
      ) {
1312
        return self::to_utf8($str);
1313
      }
1314
1315
      if (
1316
          $encoding === 'ISO-8859-1'
1317
          &&
1318
          (
1319
              $force === true
1320
              || $encodingDetected === 'ISO-8859-1'
1321
              || $encodingDetected === 'UTF-8'
1322
          )
1323
      ) {
1324
        return self::to_iso8859($str);
1325
      }
1326
1327 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1328
          $encoding !== 'UTF-8'
1329
          &&
1330
          $encoding !== 'WINDOWS-1252'
1331
          &&
1332
          self::$support['mbstring'] === false
1333
      ) {
1334
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1335
      }
1336
1337
      $strEncoded = \mb_convert_encoding(
1338
          $str,
1339
          $encoding,
1340
          $encodingDetected
1341
      );
1342
1343
      if ($strEncoded) {
1344
        return $strEncoded;
1345
      }
1346
    }
1347
1348
    return $str;
1349
  }
1350
1351
  /**
1352
   * Reads entire file into a string.
1353
   *
1354
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1355
   *
1356
   * @link http://php.net/manual/en/function.file-get-contents.php
1357
   *
1358
   * @param string        $filename      <p>
1359
   *                                     Name of the file to read.
1360
   *                                     </p>
1361
   * @param int|null      $flags         [optional] <p>
1362 2
   *                                     Prior to PHP 6, this parameter is called
1363
   *                                     use_include_path and is a bool.
1364
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1365 2
   *                                     to trigger include path
1366 2
   *                                     search.
1367
   *                                     </p>
1368 2
   *                                     <p>
1369 2
   *                                     The value of flags can be any combination of
1370
   *                                     the following flags (with some restrictions), joined with the
1371
   *                                     binary OR (|)
1372
   *                                     operator.
1373 2
   *                                     </p>
1374 2
   *                                     <p>
1375
   *                                     <table>
1376 2
   *                                     Available flags
1377 2
   *                                     <tr valign="top">
1378
   *                                     <td>Flag</td>
1379 2
   *                                     <td>Description</td>
1380 1
   *                                     </tr>
1381 1
   *                                     <tr valign="top">
1382 2
   *                                     <td>
1383
   *                                     FILE_USE_INCLUDE_PATH
1384
   *                                     </td>
1385
   *                                     <td>
1386 2
   *                                     Search for filename in the include directory.
1387 1
   *                                     See include_path for more
1388
   *                                     information.
1389
   *                                     </td>
1390 1
   *                                     </tr>
1391 1
   *                                     <tr valign="top">
1392 1
   *                                     <td>
1393 1
   *                                     FILE_TEXT
1394
   *                                     </td>
1395 1
   *                                     <td>
1396
   *                                     As of PHP 6, the default encoding of the read
1397
   *                                     data is UTF-8. You can specify a different encoding by creating a
1398
   *                                     custom context or by changing the default using
1399
   *                                     stream_default_encoding. This flag cannot be
1400
   *                                     used with FILE_BINARY.
1401
   *                                     </td>
1402
   *                                     </tr>
1403
   *                                     <tr valign="top">
1404
   *                                     <td>
1405 1
   *                                     FILE_BINARY
1406
   *                                     </td>
1407 1
   *                                     <td>
1408
   *                                     With this flag, the file is read in binary mode. This is the default
1409
   *                                     setting and cannot be used with FILE_TEXT.
1410
   *                                     </td>
1411
   *                                     </tr>
1412
   *                                     </table>
1413
   *                                     </p>
1414
   * @param resource|null $context       [optional] <p>
1415
   *                                     A valid context resource created with
1416
   *                                     stream_context_create. If you don't need to use a
1417
   *                                     custom context, you can skip this parameter by &null;.
1418
   *                                     </p>
1419 9
   * @param int|null      $offset        [optional] <p>
1420
   *                                     The offset where the reading starts.
1421 9
   *                                     </p>
1422 9
   * @param int|null      $maxlen        [optional] <p>
1423 3
   *                                     Maximum length of data read. The default is to read until end
1424
   *                                     of file is reached.
1425 3
   *                                     </p>
1426 3
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1427 3
   *
1428 9
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1429 2
   *                                     or pdf, because they used non default utf-8 chars</p>
1430 2
   *
1431 2
   * @return string <p>The function returns the read data or false on failure.</p>
1432 2
   */
1433 9
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1434
  {
1435 8
    // init
1436
    $timeout = (int)$timeout;
1437 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1438 2
1439
    if ($timeout && $context === null) {
1440 8
      $context = stream_context_create(
1441
          array(
1442 8
              'http' =>
1443 6
                  array(
1444 6
                      'timeout' => $timeout,
1445 6
                  ),
1446
          )
1447 6
      );
1448 3
    }
1449 3
1450 5
    if (is_int($maxlen)) {
1451
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1452
    } else {
1453
      $data = file_get_contents($filename, $flags, $context, $offset);
1454
    }
1455 8
1456 8
    // return false on error
1457 5
    if ($data === false) {
1458 8
      return false;
1459
    }
1460
1461 2
    if ($convertToUtf8 === true) {
1462 2
      $data = self::encode('UTF-8', $data, false);
1463 8
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1464 8
    }
1465 9
1466
    return $data;
1467 9
  }
1468
1469
  /**
1470
   * Checks if a file starts with BOM (Byte Order Mark) character.
1471
   *
1472
   * @param string $file_path <p>Path to a valid file.</p>
1473
   *
1474
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1475
   */
1476
  public static function file_has_bom($file_path)
1477
  {
1478
    return self::string_has_bom(file_get_contents($file_path));
1479
  }
1480
1481
  /**
1482
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1483
   *
1484
   * @param mixed  $var
1485
   * @param int    $normalization_form
1486
   * @param string $leading_combining
1487
   *
1488
   * @return mixed
1489
   */
1490
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1491
  {
1492
    switch (gettype($var)) {
1493 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1494
        foreach ($var as $k => $v) {
1495
          /** @noinspection AlterInForeachInspection */
1496
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1497
        }
1498
        break;
1499 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1500
        foreach ($var as $k => $v) {
1501
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1502
        }
1503
        break;
1504
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1505
1506
        if (false !== strpos($var, "\r")) {
1507
          // Workaround https://bugs.php.net/65732
1508
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1509
        }
1510
1511
        if (self::is_ascii($var) === false) {
1512
          /** @noinspection PhpUndefinedClassInspection */
1513
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1514
            $n = '-';
1515
          } else {
1516
            /** @noinspection PhpUndefinedClassInspection */
1517
            $n = \Normalizer::normalize($var, $normalization_form);
1518
1519
            if (isset($n[0])) {
1520 1
              $var = $n;
1521
            } else {
1522 1
              $var = self::encode('UTF-8', $var);
1523 1
            }
1524 1
          }
1525 1
1526
          if (
1527
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1528 1
              &&
1529
              preg_match('/^\p{Mn}/u', $var)
1530
          ) {
1531
            // Prevent leading combining chars
1532
            // for NFC-safe concatenations.
1533
            $var = $leading_combining . $var;
1534
          }
1535
        }
1536
1537
        break;
1538
    }
1539
1540 1
    return $var;
1541
  }
1542 1
1543 1
  /**
1544 1
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1545 1
   *
1546
   * Gets a specific external variable by name and optionally filters it
1547
   *
1548 1
   * @link  http://php.net/manual/en/function.filter-input.php
1549
   *
1550
   * @param int    $type          <p>
1551
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1552
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1553
   *                              <b>INPUT_ENV</b>.
1554
   *                              </p>
1555
   * @param string $variable_name <p>
1556
   *                              Name of a variable to get.
1557
   *                              </p>
1558
   * @param int    $filter        [optional] <p>
1559 1
   *                              The ID of the filter to apply. The
1560
   *                              manual page lists the available filters.
1561 1
   *                              </p>
1562
   * @param mixed  $options       [optional] <p>
1563
   *                              Associative array of options or bitwise disjunction of flags. If filter
1564
   *                              accepts options, flags can be provided in "flags" field of array.
1565
   *                              </p>
1566
   *
1567
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1568
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1569
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1570
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1571
   * @since 5.2.0
1572
   */
1573 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1574
  {
1575
    if (4 > func_num_args()) {
1576
      $var = filter_input($type, $variable_name, $filter);
1577 7
    } else {
1578
      $var = filter_input($type, $variable_name, $filter, $options);
1579 7
    }
1580 7
1581
    return self::filter($var);
1582 7
  }
1583
1584 7
  /**
1585 2
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1586
   *
1587
   * Gets external variables and optionally filters them
1588 7
   *
1589 1
   * @link  http://php.net/manual/en/function.filter-input-array.php
1590 1
   *
1591 1
   * @param int   $type       <p>
1592
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1593 7
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1594
   *                          <b>INPUT_ENV</b>.
1595
   *                          </p>
1596
   * @param mixed $definition [optional] <p>
1597
   *                          An array defining the arguments. A valid key is a string
1598
   *                          containing a variable name and a valid value is either a filter type, or an array
1599
   *                          optionally specifying the filter, flags and options. If the value is an
1600
   *                          array, valid keys are filter which specifies the
1601
   *                          filter type,
1602
   *                          flags which specifies any flags that apply to the
1603 1
   *                          filter, and options which specifies any options that
1604
   *                          apply to the filter. See the example below for a better understanding.
1605 1
   *                          </p>
1606
   *                          <p>
1607 1
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1608
   *                          input array are filtered by this filter.
1609
   *                          </p>
1610 1
   * @param bool  $add_empty  [optional] <p>
1611 1
   *                          Add missing keys as <b>NULL</b> to the return value.
1612
   *                          </p>
1613 1
   *
1614
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1615
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1616 1
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1617 1
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1618 1
   * fails.
1619 1
   * @since 5.2.0
1620 1
   */
1621 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1622 1
  {
1623
    if (2 > func_num_args()) {
1624
      $a = filter_input_array($type);
1625
    } else {
1626
      $a = filter_input_array($type, $definition, $add_empty);
1627
    }
1628
1629
    return self::filter($a);
1630
  }
1631
1632 1
  /**
1633
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1634 1
   *
1635
   * Filters a variable with a specified filter
1636
   *
1637
   * @link  http://php.net/manual/en/function.filter-var.php
1638 1
   *
1639
   * @param mixed $variable <p>
1640
   *                        Value to filter.
1641
   *                        </p>
1642
   * @param int   $filter   [optional] <p>
1643
   *                        The ID of the filter to apply. The
1644
   *                        manual page lists the available filters.
1645
   *                        </p>
1646
   * @param mixed $options  [optional] <p>
1647
   *                        Associative array of options or bitwise disjunction of flags. If filter
1648
   *                        accepts options, flags can be provided in "flags" field of array. For
1649
   *                        the "callback" filter, callable type should be passed. The
1650
   *                        callback must accept one argument, the value to be filtered, and return
1651
   *                        the value after filtering/sanitizing it.
1652
   *                        </p>
1653
   *                        <p>
1654 1
   *                        <code>
1655
   *                        // for filters that accept options, use this format
1656 1
   *                        $options = array(
1657 1
   *                        'options' => array(
1658
   *                        'default' => 3, // value to return if the filter fails
1659
   *                        // other options here
1660 1
   *                        'min_range' => 0
1661
   *                        ),
1662 1
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1663 1
   *                        );
1664 1
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1665 1
   *                        // for filter that only accept flags, you can pass them directly
1666 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1667 1
   *                        // for filter that only accept flags, you can also pass as an array
1668 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1669 1
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1670 1
   *                        // callback validate filter
1671 1
   *                        function foo($value)
1672 1
   *                        {
1673
   *                        // Expected format: Surname, GivenNames
1674
   *                        if (strpos($value, ", ") === false) return false;
1675
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1676
   *                        $empty = (empty($surname) || empty($givennames));
1677
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1678
   *                        if ($empty || $notstrings) {
1679
   *                        return false;
1680
   *                        } else {
1681
   *                        return $value;
1682
   *                        }
1683
   *                        }
1684
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1685
   *                        </code>
1686
   *                        </p>
1687
   *
1688
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1689
   * @since 5.2.0
1690
   */
1691 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1692 1
  {
1693 1
    if (3 > func_num_args()) {
1694
      $variable = filter_var($variable, $filter);
1695
    } else {
1696
      $variable = filter_var($variable, $filter, $options);
1697
    }
1698
1699
    return self::filter($variable);
1700
  }
1701
1702
  /**
1703
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1704
   *
1705
   * Gets multiple variables and optionally filters them
1706
   *
1707
   * @link  http://php.net/manual/en/function.filter-var-array.php
1708
   *
1709
   * @param array $data       <p>
1710
   *                          An array with string keys containing the data to filter.
1711
   *                          </p>
1712
   * @param mixed $definition [optional] <p>
1713
   *                          An array defining the arguments. A valid key is a string
1714
   *                          containing a variable name and a valid value is either a
1715
   *                          filter type, or an
1716
   *                          array optionally specifying the filter, flags and options.
1717
   *                          If the value is an array, valid keys are filter
1718
   *                          which specifies the filter type,
1719
   *                          flags which specifies any flags that apply to the
1720
   *                          filter, and options which specifies any options that
1721
   *                          apply to the filter. See the example below for a better understanding.
1722
   *                          </p>
1723
   *                          <p>
1724
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1725
   *                          input array are filtered by this filter.
1726
   *                          </p>
1727
   * @param bool  $add_empty  [optional] <p>
1728
   *                          Add missing keys as <b>NULL</b> to the return value.
1729
   *                          </p>
1730
   *
1731
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1732
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1733
   * the variable is not set.
1734
   * @since 5.2.0
1735
   */
1736 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1737
  {
1738
    if (2 > func_num_args()) {
1739
      $a = filter_var_array($data);
1740
    } else {
1741
      $a = filter_var_array($data, $definition, $add_empty);
1742
    }
1743
1744
    return self::filter($a);
1745
  }
1746
1747
  /**
1748
   * Check if the number of unicode characters are not more than the specified integer.
1749
   *
1750
   * @param string $str      The original string to be checked.
1751
   * @param int    $box_size The size in number of chars to be checked against string.
1752 1
   *
1753
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1754 1
   */
1755 1
  public static function fits_inside($str, $box_size)
1756
  {
1757 1
    return (self::strlen($str) <= $box_size);
1758
  }
1759
1760
  /**
1761
   * Try to fix simple broken UTF-8 strings.
1762
   *
1763
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1764
   *
1765
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1766
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1767
   * See: http://en.wikipedia.org/wiki/Windows-1252
1768
   *
1769
   * @param string $str <p>The input string</p>
1770
   *
1771
   * @return string
1772 1
   */
1773 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1774 1
  {
1775
    // init
1776
    $str = (string)$str;
1777
1778
    if (!isset($str[0])) {
1779
      return '';
1780
    }
1781
1782
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1783
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1784
1785
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1786 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1787
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1788 1
    }
1789 1
1790
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1791
  }
1792 1
1793 1
  /**
1794
   * Fix a double (or multiple) encoded UTF8 string.
1795
   *
1796 1
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1797
   *
1798
   * @return mixed
1799
   */
1800
  public static function fix_utf8($str)
1801
  {
1802
    if (is_array($str)) {
1803
1804
      /** @noinspection ForeachSourceInspection */
1805
      foreach ($str as $k => $v) {
1806
        /** @noinspection AlterInForeachInspection */
1807
        /** @noinspection OffsetOperationsInspection */
1808
        $str[$k] = self::fix_utf8($v);
1809
      }
1810 1
1811
      return $str;
1812 1
    }
1813
1814
    $last = '';
1815
    while ($last !== $str) {
1816
      $last = $str;
1817
      $str = self::to_utf8(
1818
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1817 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1819
      );
1820
    }
1821
1822
    return $str;
1823
  }
1824
1825
  /**
1826 2
   * Get character of a specific character.
1827
   *
1828
   * @param string $char
1829 2
   *
1830
   * @return string <p>'RTL' or 'LTR'</p>
1831 2
   */
1832 2
  public static function getCharDirection($char)
1833 1
  {
1834 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1835
      self::checkForSupport();
1836 2
    }
1837 1
1838 1
    if (self::$support['intlChar'] === true) {
1839
      $tmpReturn = \IntlChar::charDirection($char);
1840 2
1841 2
      // from "IntlChar"-Class
1842 2
      $charDirection = array(
1843
          'RTL' => array(1, 13, 14, 15, 21),
1844 2
          'LTR' => array(0, 11, 12, 20),
1845
      );
1846
1847
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1848
        return 'LTR';
1849
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1850
        return 'RTL';
1851
      }
1852
    }
1853
1854
    $c = static::chr_to_decimal($char);
1855
1856
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1857
      return 'LTR';
1858
    }
1859
1860
    if (0x85e >= $c) {
1861
1862
      if (0x5be === $c ||
1863
          0x5c0 === $c ||
1864
          0x5c3 === $c ||
1865
          0x5c6 === $c ||
1866
          (0x5d0 <= $c && 0x5ea >= $c) ||
1867
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1868
          0x608 === $c ||
1869
          0x60b === $c ||
1870
          0x60d === $c ||
1871
          0x61b === $c ||
1872
          (0x61e <= $c && 0x64a >= $c) ||
1873
          (0x66d <= $c && 0x66f >= $c) ||
1874
          (0x671 <= $c && 0x6d5 >= $c) ||
1875
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1876
          (0x6ee <= $c && 0x6ef >= $c) ||
1877
          (0x6fa <= $c && 0x70d >= $c) ||
1878
          0x710 === $c ||
1879
          (0x712 <= $c && 0x72f >= $c) ||
1880
          (0x74d <= $c && 0x7a5 >= $c) ||
1881
          0x7b1 === $c ||
1882
          (0x7c0 <= $c && 0x7ea >= $c) ||
1883
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1884
          0x7fa === $c ||
1885
          (0x800 <= $c && 0x815 >= $c) ||
1886
          0x81a === $c ||
1887
          0x824 === $c ||
1888
          0x828 === $c ||
1889
          (0x830 <= $c && 0x83e >= $c) ||
1890
          (0x840 <= $c && 0x858 >= $c) ||
1891
          0x85e === $c
1892
      ) {
1893
        return 'RTL';
1894
      }
1895
1896
    } elseif (0x200f === $c) {
1897
1898
      return 'RTL';
1899
1900
    } elseif (0xfb1d <= $c) {
1901
1902
      if (0xfb1d === $c ||
1903
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1904
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1905
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1906
          0xfb3e === $c ||
1907
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1908
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1909
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1910
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1911
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1912
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1913
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1914
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1915
          (0xfe76 <= $c && 0xfefc >= $c) ||
1916
          (0x10800 <= $c && 0x10805 >= $c) ||
1917
          0x10808 === $c ||
1918
          (0x1080a <= $c && 0x10835 >= $c) ||
1919
          (0x10837 <= $c && 0x10838 >= $c) ||
1920
          0x1083c === $c ||
1921
          (0x1083f <= $c && 0x10855 >= $c) ||
1922
          (0x10857 <= $c && 0x1085f >= $c) ||
1923
          (0x10900 <= $c && 0x1091b >= $c) ||
1924
          (0x10920 <= $c && 0x10939 >= $c) ||
1925
          0x1093f === $c ||
1926 9
          0x10a00 === $c ||
1927
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1928 9
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1929
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1930 9
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1931 6
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1932
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1933
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1934 9
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1935 7
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1936
          (0x10b78 <= $c && 0x10b7f >= $c)
1937
      ) {
1938
        return 'RTL';
1939 9
      }
1940 9
    }
1941
1942 9
    return 'LTR';
1943 9
  }
1944 9
1945 9
  /**
1946 9
   * get data from "/data/*.ser"
1947 6
   *
1948
   * @param string $file
1949
   *
1950 9
   * @return bool|string|array|int <p>Will return false on error.</p>
1951 2
   */
1952 2
  private static function getData($file)
1953
  {
1954 9
    $file = __DIR__ . '/data/' . $file . '.php';
1955 4
    if (file_exists($file)) {
1956 4
      /** @noinspection PhpIncludeInspection */
1957 4
      return require $file;
1958
    } else {
1959
      return false;
1960 4
    }
1961
  }
1962
1963 9
  /**
1964
   * alias for "UTF8::string_has_bom()"
1965 9
   *
1966 9
   * @see UTF8::string_has_bom()
1967
   *
1968 7
   * @param string $str
1969
   *
1970 7
   * @return bool
1971 6
   *
1972
   * @deprecated
1973 4
   */
1974
  public static function hasBom($str)
1975 9
  {
1976
    return self::string_has_bom($str);
1977 9
  }
1978
1979
  /**
1980 9
   * Converts a hexadecimal-value into an UTF-8 character.
1981 9
   *
1982 9
   * @param string $hexdec <p>The hexadecimal value.</p>
1983
   *
1984 9
   * @return string|false <p>One single UTF-8 character.</p>
1985
   */
1986 9
  public static function hex_to_chr($hexdec)
1987
  {
1988 9
    return self::decimal_to_chr(hexdec($hexdec));
1989
  }
1990
1991
  /**
1992
   * Converts hexadecimal U+xxxx code point representation to integer.
1993
   *
1994
   * INFO: opposite to UTF8::int_to_hex()
1995
   *
1996
   * @param string $hexdec <p>The hexadecimal code point representation.</p>
1997
   *
1998
   * @return int|false <p>The code point, or false on failure.</p>
1999
   */
2000
  public static function hex_to_int($hexdec)
2001
  {
2002
    if (!$hexdec) {
2003
      return false;
2004
    }
2005
2006
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexdec, $match)) {
2007
      return intval($match[1], 16);
2008
    }
2009
2010
    return false;
2011
  }
2012
2013
  /**
2014
   * alias for "UTF8::html_entity_decode()"
2015
   *
2016
   * @see UTF8::html_entity_decode()
2017
   *
2018
   * @param string $str
2019
   * @param int    $flags
2020
   * @param string $encoding
2021
   *
2022
   * @return string
2023
   */
2024
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2025
  {
2026
    return self::html_entity_decode($str, $flags, $encoding);
2027
  }
2028
2029
  /**
2030
   * Converts a UTF-8 string to a series of HTML numbered entities.
2031
   *
2032
   * INFO: opposite to UTF8::html_decode()
2033
   *
2034
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2035
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2036
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2037
   *
2038
   * @return string <p>HTML numbered entities.</p>
2039
   */
2040
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2041
  {
2042
    // init
2043
    $str = (string)$str;
2044
2045
    if (!isset($str[0])) {
2046
      return '';
2047
    }
2048
2049
    if ($encoding !== 'UTF-8') {
2050
      $encoding = self::normalize_encoding($encoding);
2051
    }
2052
2053
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2054
    if (function_exists('mb_encode_numericentity')) {
2055
2056
      $startCode = 0x00;
2057
      if ($keepAsciiChars === true) {
2058
        $startCode = 0x80;
2059
      }
2060
2061
      return mb_encode_numericentity(
2062
          $str,
2063
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2064
          $encoding
2065
      );
2066
    }
2067
2068
    return implode(
2069
        '',
2070
        array_map(
2071
            function ($data) use ($keepAsciiChars, $encoding) {
2072
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2073
            },
2074
            self::split($str)
2075
        )
2076
    );
2077
  }
2078
2079
  /**
2080
   * UTF-8 version of html_entity_decode()
2081
   *
2082
   * The reason we are not using html_entity_decode() by itself is because
2083
   * while it is not technically correct to leave out the semicolon
2084
   * at the end of an entity most browsers will still interpret the entity
2085
   * correctly. html_entity_decode() does not convert entities without
2086
   * semicolons, so we are left with our own little solution here. Bummer.
2087
   *
2088
   * Convert all HTML entities to their applicable characters
2089
   *
2090
   * INFO: opposite to UTF8::html_encode()
2091
   *
2092
   * @link http://php.net/manual/en/function.html-entity-decode.php
2093
   *
2094 2
   * @param string $str      <p>
2095
   *                         The input string.
2096 2
   *                         </p>
2097 1
   * @param int    $flags    [optional] <p>
2098 1
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2099
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2100 2
   *                         <table>
2101
   *                         Available <i>flags</i> constants
2102 2
   *                         <tr valign="top">
2103 1
   *                         <td>Constant Name</td>
2104
   *                         <td>Description</td>
2105
   *                         </tr>
2106 2
   *                         <tr valign="top">
2107 2
   *                         <td><b>ENT_COMPAT</b></td>
2108 2
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2109 2
   *                         </tr>
2110 2
   *                         <tr valign="top">
2111 1
   *                         <td><b>ENT_QUOTES</b></td>
2112
   *                         <td>Will convert both double and single quotes.</td>
2113 1
   *                         </tr>
2114 1
   *                         <tr valign="top">
2115 1
   *                         <td><b>ENT_NOQUOTES</b></td>
2116 1
   *                         <td>Will leave both double and single quotes unconverted.</td>
2117 1
   *                         </tr>
2118 2
   *                         <tr valign="top">
2119
   *                         <td><b>ENT_HTML401</b></td>
2120 2
   *                         <td>
2121
   *                         Handle code as HTML 4.01.
2122
   *                         </td>
2123
   *                         </tr>
2124
   *                         <tr valign="top">
2125
   *                         <td><b>ENT_XML1</b></td>
2126
   *                         <td>
2127
   *                         Handle code as XML 1.
2128
   *                         </td>
2129
   *                         </tr>
2130
   *                         <tr valign="top">
2131
   *                         <td><b>ENT_XHTML</b></td>
2132
   *                         <td>
2133
   *                         Handle code as XHTML.
2134
   *                         </td>
2135
   *                         </tr>
2136
   *                         <tr valign="top">
2137
   *                         <td><b>ENT_HTML5</b></td>
2138
   *                         <td>
2139
   *                         Handle code as HTML 5.
2140
   *                         </td>
2141
   *                         </tr>
2142
   *                         </table>
2143
   *                         </p>
2144
   * @param string $encoding [optional] <p>Encoding to use.</p>
2145
   *
2146
   * @return string <p>The decoded string.</p>
2147
   */
2148
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2149
  {
2150
    // init
2151
    $str = (string)$str;
2152
2153
    if (!isset($str[0])) {
2154
      return '';
2155
    }
2156
2157
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2158
      return $str;
2159
    }
2160
2161
    if (
2162
        strpos($str, '&') === false
2163
        ||
2164
        (
2165
            strpos($str, '&#') === false
2166
            &&
2167
            strpos($str, ';') === false
2168
        )
2169
    ) {
2170
      return $str;
2171
    }
2172
2173
    if ($encoding !== 'UTF-8') {
2174
      $encoding = self::normalize_encoding($encoding);
2175
    }
2176
2177
    if ($flags === null) {
2178
      if (Bootup::is_php('5.4') === true) {
2179
        $flags = ENT_QUOTES | ENT_HTML5;
2180
      } else {
2181
        $flags = ENT_QUOTES;
2182
      }
2183
    }
2184
2185
    do {
2186
      $str_compare = $str;
2187
2188
      $str = preg_replace_callback(
2189
          "/&#\d{2,6};/",
2190
          function ($matches) use ($encoding) {
2191
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2192
2193
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2194
              return $returnTmp;
2195
            } else {
2196
              return $matches[0];
2197
            }
2198
          },
2199
          $str
2200
      );
2201
2202
      // decode numeric & UTF16 two byte entities
2203
      $str = html_entity_decode(
2204
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2205
          $flags,
2206
          $encoding
2207
      );
2208
2209
    } while ($str_compare !== $str);
2210
2211
    return $str;
2212
  }
2213
2214
  /**
2215
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2216
   *
2217
   * @link http://php.net/manual/en/function.htmlentities.php
2218
   *
2219
   * @param string $str           <p>
2220
   *                              The input string.
2221
   *                              </p>
2222
   * @param int    $flags         [optional] <p>
2223
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2224
   *                              invalid code unit sequences and the used document type. The default is
2225
   *                              ENT_COMPAT | ENT_HTML401.
2226
   *                              <table>
2227
   *                              Available <i>flags</i> constants
2228
   *                              <tr valign="top">
2229
   *                              <td>Constant Name</td>
2230
   *                              <td>Description</td>
2231
   *                              </tr>
2232 1
   *                              <tr valign="top">
2233
   *                              <td><b>ENT_COMPAT</b></td>
2234 1
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2235
   *                              </tr>
2236
   *                              <tr valign="top">
2237
   *                              <td><b>ENT_QUOTES</b></td>
2238 1
   *                              <td>Will convert both double and single quotes.</td>
2239
   *                              </tr>
2240
   *                              <tr valign="top">
2241
   *                              <td><b>ENT_NOQUOTES</b></td>
2242
   *                              <td>Will leave both double and single quotes unconverted.</td>
2243
   *                              </tr>
2244
   *                              <tr valign="top">
2245
   *                              <td><b>ENT_IGNORE</b></td>
2246 1
   *                              <td>
2247
   *                              Silently discard invalid code unit sequences instead of returning
2248 1
   *                              an empty string. Using this flag is discouraged as it
2249
   *                              may have security implications.
2250
   *                              </td>
2251
   *                              </tr>
2252
   *                              <tr valign="top">
2253
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2254
   *                              <td>
2255
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2256
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2257
   *                              </td>
2258
   *                              </tr>
2259
   *                              <tr valign="top">
2260
   *                              <td><b>ENT_DISALLOWED</b></td>
2261 3
   *                              <td>
2262
   *                              Replace invalid code points for the given document type with a
2263 3
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2264 3
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2265
   *                              instance, to ensure the well-formedness of XML documents with
2266 3
   *                              embedded external content.
2267
   *                              </td>
2268 3
   *                              </tr>
2269
   *                              <tr valign="top">
2270
   *                              <td><b>ENT_HTML401</b></td>
2271
   *                              <td>
2272
   *                              Handle code as HTML 4.01.
2273
   *                              </td>
2274
   *                              </tr>
2275
   *                              <tr valign="top">
2276
   *                              <td><b>ENT_XML1</b></td>
2277
   *                              <td>
2278
   *                              Handle code as XML 1.
2279 1
   *                              </td>
2280
   *                              </tr>
2281 1
   *                              <tr valign="top">
2282
   *                              <td><b>ENT_XHTML</b></td>
2283
   *                              <td>
2284
   *                              Handle code as XHTML.
2285
   *                              </td>
2286
   *                              </tr>
2287
   *                              <tr valign="top">
2288
   *                              <td><b>ENT_HTML5</b></td>
2289 2
   *                              <td>
2290
   *                              Handle code as HTML 5.
2291 2
   *                              </td>
2292
   *                              </tr>
2293
   *                              </table>
2294
   *                              </p>
2295
   * @param string $encoding      [optional] <p>
2296
   *                              Like <b>htmlspecialchars</b>,
2297
   *                              <b>htmlentities</b> takes an optional third argument
2298
   *                              <i>encoding</i> which defines encoding used in
2299
   *                              conversion.
2300
   *                              Although this argument is technically optional, you are highly
2301
   *                              encouraged to specify the correct value for your code.
2302
   *                              </p>
2303 2
   * @param bool   $double_encode [optional] <p>
2304
   *                              When <i>double_encode</i> is turned off PHP will not
2305 2
   *                              encode existing html entities. The default is to convert everything.
2306
   *                              </p>
2307
   *
2308
   *
2309
   * @return string the encoded string.
2310
   * </p>
2311
   * <p>
2312
   * If the input <i>string</i> contains an invalid code unit
2313
   * sequence within the given <i>encoding</i> an empty string
2314
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2315
   * <b>ENT_SUBSTITUTE</b> flags are set.
2316
   */
2317 1
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2318
  {
2319 1
    if ($encoding !== 'UTF-8') {
2320
      $encoding = self::normalize_encoding($encoding);
2321
    }
2322
2323
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2324
2325
    if ($encoding !== 'UTF-8') {
2326
      return $str;
2327
    }
2328
2329
    $byteLengths = self::chr_size_list($str);
2330
    $search = array();
2331
    $replacements = array();
2332
    foreach ($byteLengths as $counter => $byteLength) {
2333
      if ($byteLength >= 3) {
2334
        $char = self::access($str, $counter);
2335
2336
        if (!isset($replacements[$char])) {
2337
          $search[$char] = $char;
2338
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2334 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2339
        }
2340
      }
2341
    }
2342
2343
    return str_replace($search, $replacements, $str);
2344
  }
2345
2346
  /**
2347
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2348
   *
2349
   * INFO: Take a look at "UTF8::htmlentities()"
2350
   *
2351
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2352
   *
2353
   * @param string $str           <p>
2354
   *                              The string being converted.
2355
   *                              </p>
2356
   * @param int    $flags         [optional] <p>
2357
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2358
   *                              invalid code unit sequences and the used document type. The default is
2359 1
   *                              ENT_COMPAT | ENT_HTML401.
2360
   *                              <table>
2361 1
   *                              Available <i>flags</i> constants
2362
   *                              <tr valign="top">
2363
   *                              <td>Constant Name</td>
2364
   *                              <td>Description</td>
2365
   *                              </tr>
2366
   *                              <tr valign="top">
2367
   *                              <td><b>ENT_COMPAT</b></td>
2368
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2369
   *                              </tr>
2370
   *                              <tr valign="top">
2371
   *                              <td><b>ENT_QUOTES</b></td>
2372
   *                              <td>Will convert both double and single quotes.</td>
2373
   *                              </tr>
2374
   *                              <tr valign="top">
2375
   *                              <td><b>ENT_NOQUOTES</b></td>
2376
   *                              <td>Will leave both double and single quotes unconverted.</td>
2377
   *                              </tr>
2378
   *                              <tr valign="top">
2379
   *                              <td><b>ENT_IGNORE</b></td>
2380
   *                              <td>
2381
   *                              Silently discard invalid code unit sequences instead of returning
2382
   *                              an empty string. Using this flag is discouraged as it
2383
   *                              may have security implications.
2384
   *                              </td>
2385
   *                              </tr>
2386
   *                              <tr valign="top">
2387 1
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2388
   *                              <td>
2389 1
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2390
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2391
   *                              </td>
2392
   *                              </tr>
2393
   *                              <tr valign="top">
2394
   *                              <td><b>ENT_DISALLOWED</b></td>
2395
   *                              <td>
2396
   *                              Replace invalid code points for the given document type with a
2397
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2398
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2399
   *                              instance, to ensure the well-formedness of XML documents with
2400
   *                              embedded external content.
2401 1
   *                              </td>
2402
   *                              </tr>
2403 1
   *                              <tr valign="top">
2404
   *                              <td><b>ENT_HTML401</b></td>
2405
   *                              <td>
2406
   *                              Handle code as HTML 4.01.
2407
   *                              </td>
2408
   *                              </tr>
2409
   *                              <tr valign="top">
2410
   *                              <td><b>ENT_XML1</b></td>
2411
   *                              <td>
2412
   *                              Handle code as XML 1.
2413
   *                              </td>
2414
   *                              </tr>
2415
   *                              <tr valign="top">
2416 16
   *                              <td><b>ENT_XHTML</b></td>
2417
   *                              <td>
2418 16
   *                              Handle code as XHTML.
2419
   *                              </td>
2420
   *                              </tr>
2421
   *                              <tr valign="top">
2422
   *                              <td><b>ENT_HTML5</b></td>
2423
   *                              <td>
2424
   *                              Handle code as HTML 5.
2425
   *                              </td>
2426
   *                              </tr>
2427
   *                              </table>
2428
   *                              </p>
2429
   * @param string $encoding      [optional] <p>
2430
   *                              Defines encoding used in conversion.
2431 28
   *                              </p>
2432
   *                              <p>
2433 28
   *                              For the purposes of this function, the encodings
2434
   *                              ISO-8859-1, ISO-8859-15,
2435 28
   *                              UTF-8, cp866,
2436 5
   *                              cp1251, cp1252, and
2437
   *                              KOI8-R are effectively equivalent, provided the
2438
   *                              <i>string</i> itself is valid for the encoding, as
2439 28
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2440
   *                              the same positions in all of these encodings.
2441
   *                              </p>
2442
   * @param bool   $double_encode [optional] <p>
2443
   *                              When <i>double_encode</i> is turned off PHP will not
2444
   *                              encode existing html entities, the default is to convert everything.
2445
   *                              </p>
2446
   *
2447
   * @return string The converted string.
2448
   * </p>
2449 1
   * <p>
2450
   * If the input <i>string</i> contains an invalid code unit
2451 1
   * sequence within the given <i>encoding</i> an empty string
2452
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2453 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
2454 1
   */
2455
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2456
  {
2457 1
    if ($encoding !== 'UTF-8') {
2458 1
      $encoding = self::normalize_encoding($encoding);
2459
    }
2460 1
2461
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2462
  }
2463
2464
  /**
2465
   * Checks whether iconv is available on the server.
2466
   *
2467
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2468
   */
2469
  public static function iconv_loaded()
2470
  {
2471 16
    $return = extension_loaded('iconv') ? true : false;
2472
2473
    // INFO: "default_charset" is already set by the "Bootup"-class
2474 16
2475
    if (!Bootup::is_php('5.6')) {
2476
      // INFO: "iconv_set_encoding" is deprecated cince PHP 5.6
2477 16
      iconv_set_encoding('input_encoding', 'UTF-8');
2478
      iconv_set_encoding('output_encoding', 'UTF-8');
2479 16
      iconv_set_encoding('internal_encoding', 'UTF-8');
2480 16
    }
2481 15
2482 16
    return $return;
2483 6
  }
2484
2485 15
  /**
2486
   * alias for "UTF8::decimal_to_chr()"
2487
   *
2488
   * @see UTF8::decimal_to_chr()
2489
   *
2490
   * @param int $int
2491
   *
2492
   * @return string
2493
   */
2494
  public static function int_to_chr($int)
2495
  {
2496
    return self::decimal_to_chr($int);
2497
  }
2498
2499
  /**
2500
   * Converts Integer to hexadecimal U+xxxx code point representation.
2501
   *
2502
   * INFO: opposite to UTF8::hex_to_int()
2503
   *
2504
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2505
   * @param string $pfix [optional]
2506
   *
2507
   * @return string <p>The code point, or empty string on failure.</p>
2508
   */
2509
  public static function int_to_hex($int, $pfix = 'U+')
2510
  {
2511
    if (ctype_digit((string)$int)) {
2512
      $hex = dechex((int)$int);
2513
2514
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2515
2516
      return $pfix . $hex;
2517
    }
2518
2519
    return '';
2520
  }
2521
2522
  /**
2523
   * Checks whether intl-char is available on the server.
2524
   *
2525
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2526
   */
2527
  public static function intlChar_loaded()
2528
  {
2529
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2530
  }
2531
2532
  /**
2533
   * Checks whether intl is available on the server.
2534
   *
2535
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2536 1
   */
2537
  public static function intl_loaded()
2538 1
  {
2539
    return extension_loaded('intl') ? true : false;
2540 1
  }
2541
2542
  /**
2543
   * alias for "UTF8::is_ascii()"
2544
   *
2545 1
   * @see UTF8::is_ascii()
2546
   *
2547 1
   * @param string $str
2548
   *
2549 1
   * @return boolean
2550 1
   *
2551
   * @deprecated
2552 1
   */
2553
  public static function isAscii($str)
2554
  {
2555
    return self::is_ascii($str);
2556
  }
2557
2558
  /**
2559
   * alias for "UTF8::is_base64()"
2560
   *
2561
   * @see UTF8::is_base64()
2562
   *
2563 1
   * @param string $str
2564
   *
2565 1
   * @return bool
2566
   *
2567 1
   * @deprecated
2568
   */
2569
  public static function isBase64($str)
2570
  {
2571
    return self::is_base64($str);
2572 1
  }
2573 1
2574 1
  /**
2575 1
   * alias for "UTF8::is_binary()"
2576 1
   *
2577
   * @see UTF8::is_binary()
2578 1
   *
2579
   * @param string $str
2580
   *
2581
   * @return bool
2582
   *
2583
   * @deprecated
2584
   */
2585
  public static function isBinary($str)
2586
  {
2587
    return self::is_binary($str);
2588
  }
2589
2590
  /**
2591
   * alias for "UTF8::is_bom()"
2592
   *
2593 4
   * @see UTF8::is_bom()
2594
   *
2595 4
   * @param string $utf8_chr
2596
   *
2597 4
   * @return boolean
2598
   *
2599 4
   * @deprecated
2600 4
   */
2601 4
  public static function isBom($utf8_chr)
2602 4
  {
2603 4
    return self::is_bom($utf8_chr);
2604 4
  }
2605 4
2606 4
  /**
2607 4
   * alias for "UTF8::is_html()"
2608 2
   *
2609 2
   * @see UTF8::is_html()
2610 4
   *
2611 4
   * @param string $str
2612 4
   *
2613
   * @return boolean
2614 4
   *
2615 4
   * @deprecated
2616 4
   */
2617 4
  public static function isHtml($str)
2618 4
  {
2619 4
    return self::is_html($str);
2620 4
  }
2621 4
2622 4
  /**
2623 3
   * alias for "UTF8::is_json()"
2624 3
   *
2625 4
   * @see UTF8::is_json()
2626 4
   *
2627 4
   * @param string $str
2628
   *
2629 4
   * @return bool
2630 3
   *
2631 2
   * @deprecated
2632
   */
2633 3
  public static function isJson($str)
2634
  {
2635
    return self::is_json($str);
2636
  }
2637 3
2638
  /**
2639 3
   * alias for "UTF8::is_utf16()"
2640
   *
2641
   * @see UTF8::is_utf16()
2642
   *
2643
   * @param string $str
2644
   *
2645
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2646
   *
2647
   * @deprecated
2648
   */
2649
  public static function isUtf16($str)
2650
  {
2651
    return self::is_utf16($str);
2652
  }
2653 3
2654
  /**
2655 3
   * alias for "UTF8::is_utf32()"
2656
   *
2657 3
   * @see UTF8::is_utf32()
2658
   *
2659 3
   * @param string $str
2660 3
   *
2661 3
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2662 3
   *
2663 3
   * @deprecated
2664 3
   */
2665 3
  public static function isUtf32($str)
2666 3
  {
2667 3
    return self::is_utf32($str);
2668 1
  }
2669 1
2670 3
  /**
2671 3
   * alias for "UTF8::is_utf8()"
2672 3
   *
2673
   * @see UTF8::is_utf8()
2674 3
   *
2675 3
   * @param string $str
2676 3
   * @param bool   $strict
2677 3
   *
2678 3
   * @return bool
2679 3
   *
2680 3
   * @deprecated
2681 3
   */
2682 3
  public static function isUtf8($str, $strict = false)
2683 1
  {
2684 1
    return self::is_utf8($str, $strict);
2685 3
  }
2686 3
2687 3
  /**
2688
   * Checks if a string is 7 bit ASCII.
2689 3
   *
2690 1
   * @param string $str <p>The string to check.</p>
2691 1
   *
2692
   * @return bool <p>
2693 1
   *              <strong>true</strong> if it is ASCII<br />
2694
   *              <strong>false</strong> otherwise
2695
   *              </p>
2696
   */
2697 3
  public static function is_ascii($str)
2698
  {
2699 3
    $str = (string)$str;
2700
2701
    if (!isset($str[0])) {
2702
      return true;
2703
    }
2704
2705
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2706
  }
2707
2708
  /**
2709
   * Returns true if the string is base64 encoded, false otherwise.
2710
   *
2711
   * @param string $str <p>The input string.</p>
2712 43
   *
2713
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2714 43
   */
2715
  public static function is_base64($str)
2716 43
  {
2717 3
    $str = (string)$str;
2718
2719
    if (!isset($str[0])) {
2720 41
      return false;
2721 1
    }
2722 1
2723
    if (base64_encode(base64_decode($str, true)) === $str) {
2724
      return true;
2725
    } else {
2726
      return false;
2727
    }
2728
  }
2729
2730 41
  /**
2731
   * Check if the input is binary... (is look like a hack).
2732
   *
2733
   * @param mixed $input
2734
   *
2735
   * @return bool
2736
   */
2737
  public static function is_binary($input)
2738
  {
2739
2740 41
    $testLength = strlen($input);
2741
2742 41
    if (
2743 41
        preg_match('~^[01]+$~', $input)
2744 41
        ||
2745
        substr_count($input, "\x00") > 0
2746
        ||
2747 41
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
2748 41
    ) {
2749 41
      return true;
2750
    } else {
2751
      return false;
2752 41
    }
2753
  }
2754 36
2755 41
  /**
2756
   * Check if the file is binary.
2757 34
   *
2758 34
   * @param string $file
2759 34
   *
2760 34
   * @return boolean
2761 39
   */
2762
  public static function is_binary_file($file)
2763 21
  {
2764 21
    try {
2765 21
      $fp = fopen($file, 'r');
2766 21
      $block = fread($fp, 512);
2767 33
      fclose($fp);
2768
    } catch (\Exception $e) {
2769 9
      $block = '';
2770 9
    }
2771 9
2772 9
    return self::is_binary($block);
2773 16
  }
2774
2775
  /**
2776
   * Checks if the given string is equal to any "Byte Order Mark".
2777
   *
2778
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2779
   *
2780
   * @param string $str <p>The input string.</p>
2781
   *
2782 3
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2783 3
   */
2784 3
  public static function is_bom($str)
2785 3
  {
2786 9
    foreach (self::$bom as $bomString => $bomByteLength) {
2787
      if ($str === $bomString) {
2788 3
        return true;
2789 3
      }
2790 3
    }
2791 3
2792 3
    return false;
2793
  }
2794
2795
  /**
2796 5
   * Check if the string contains any html-tags <lall>.
2797
   *
2798 41
   * @param string $str <p>The input string.</p>
2799
   *
2800
   * @return boolean
2801 36
   */
2802
  public static function is_html($str)
2803 33
  {
2804 33
    $str = (string)$str;
2805 33
2806 33
    if (!isset($str[0])) {
2807
      return false;
2808
    }
2809
2810
    // init
2811 33
    $matches = array();
2812
2813
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
2814
2815
    if (count($matches) == 0) {
2816
      return false;
2817 33
    } else {
2818 33
      return true;
2819 33
    }
2820 33
  }
2821
2822 33
  /**
2823
   * Try to check if "$str" is an json-string.
2824 33
   *
2825 33
   * @param string $str <p>The input string.</p>
2826 5
   *
2827
   * @return bool
2828
   */
2829 33
  public static function is_json($str)
2830 33
  {
2831 33
    $str = (string)$str;
2832 33
2833 33
    if (!isset($str[0])) {
2834
      return false;
2835
    }
2836
2837
    if (
2838 18
        is_object(self::json_decode($str))
2839
        &&
2840
        json_last_error() === JSON_ERROR_NONE
2841 41
    ) {
2842
      return true;
2843 20
    } else {
2844
      return false;
2845
    }
2846
  }
2847
2848
  /**
2849
   * Check if the string is UTF-16.
2850
   *
2851
   * @param string $str <p>The input string.</p>
2852
   *
2853
   * @return int|false <p>
2854
   *                   <strong>false</strong> if is't not UTF-16,<br />
2855
   *                   <strong>1</strong> for UTF-16LE,<br />
2856
   *                   <strong>2</strong> for UTF-16BE.
2857
   *                   </p>
2858
   */
2859 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2860
  {
2861
    $str = self::remove_bom($str);
2862
2863
    if (self::is_binary($str)) {
2864
2865
      $maybeUTF16LE = 0;
2866
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2867
      if ($test) {
2868
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2869
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2870
        if ($test3 === $test) {
2871
          $strChars = self::count_chars($str, true);
2872
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2873
            if (in_array($test3char, $strChars, true) === true) {
2874
              $maybeUTF16LE++;
2875
            }
2876
          }
2877
        }
2878
      }
2879
2880
      $maybeUTF16BE = 0;
2881
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2882
      if ($test) {
2883 2
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2884
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2885 2
        if ($test3 === $test) {
2886
          $strChars = self::count_chars($str, true);
2887 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2888 2
            if (in_array($test3char, $strChars, true) === true) {
2889 2
              $maybeUTF16BE++;
2890
            }
2891
          }
2892
        }
2893 2
      }
2894
2895
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2896
        if ($maybeUTF16LE > $maybeUTF16BE) {
2897
          return 1;
2898
        } else {
2899
          return 2;
2900
        }
2901
      }
2902
2903
    }
2904
2905
    return false;
2906
  }
2907
2908
  /**
2909
   * Check if the string is UTF-32.
2910
   *
2911
   * @param string $str
2912
   *
2913
   * @return int|false <p>
2914
   *                   <strong>false</strong> if is't not UTF-16,<br />
2915
   *                   <strong>1</strong> for UTF-32LE,<br />
2916
   *                   <strong>2</strong> for UTF-32BE.
2917
   *                   </p>
2918
   */
2919 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2920
  {
2921
    $str = self::remove_bom($str);
2922
2923
    if (self::is_binary($str)) {
2924
2925
      $maybeUTF32LE = 0;
2926
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2927
      if ($test) {
2928
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2929
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2930
        if ($test3 === $test) {
2931
          $strChars = self::count_chars($str, true);
2932 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2933
            if (in_array($test3char, $strChars, true) === true) {
2934 2
              $maybeUTF32LE++;
2935
            }
2936 2
          }
2937
        }
2938
      }
2939 2
2940
      $maybeUTF32BE = 0;
2941
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2942 2
      if ($test) {
2943
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2944
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2945
        if ($test3 === $test) {
2946
          $strChars = self::count_chars($str, true);
2947
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2948
            if (in_array($test3char, $strChars, true) === true) {
2949
              $maybeUTF32BE++;
2950
            }
2951
          }
2952 6
        }
2953
      }
2954 6
2955
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2956
        if ($maybeUTF32LE > $maybeUTF32BE) {
2957
          return 1;
2958
        } else {
2959
          return 2;
2960
        }
2961
      }
2962
2963
    }
2964
2965 24
    return false;
2966
  }
2967 24
2968
  /**
2969 24
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2970 2
   *
2971
   * @see    http://hsivonen.iki.fi/php-utf8/
2972
   *
2973
   * @param string $str    <p>The string to be checked.</p>
2974 23
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2975 2
   *
2976
   * @return bool
2977
   */
2978 23
  public static function is_utf8($str, $strict = false)
2979
  {
2980 23
    $str = (string)$str;
2981
2982
    if (!isset($str[0])) {
2983
      return true;
2984
    }
2985
2986
    if ($strict === true) {
2987
      if (self::is_utf16($str) !== false) {
2988
        return false;
2989
      }
2990 1
2991
      if (self::is_utf32($str) !== false) {
2992 1
        return false;
2993
      }
2994
    }
2995
2996 1
    if (self::pcre_utf8_support() !== true) {
2997
2998
      // If even just the first character can be matched, when the /u
2999
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3000
      // invalid, nothing at all will match, even if the string contains
3001
      // some valid sequences
3002
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3003
3004
    } else {
3005
3006
      $mState = 0; // cached expected number of octets after the current octet
3007 1
      // until the beginning of the next UTF8 character sequence
3008
      $mUcs4 = 0; // cached Unicode character
3009 1
      $mBytes = 1; // cached expected number of octets in the current sequence
3010 1
      $len = strlen($str);
3011 1
3012
      /** @noinspection ForeachInvariantsInspection */
3013 1
      for ($i = 0; $i < $len; $i++) {
3014
        $in = ord($str[$i]);
3015
        if ($mState === 0) {
3016
          // When mState is zero we expect either a US-ASCII character or a
3017
          // multi-octet sequence.
3018
          if (0 === (0x80 & $in)) {
3019
            // US-ASCII, pass straight through.
3020
            $mBytes = 1;
3021 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3022 2
            // First octet of 2 octet sequence.
3023
            $mUcs4 = $in;
3024 2
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3025
            $mState = 1;
3026 2
            $mBytes = 2;
3027 2
          } elseif (0xE0 === (0xF0 & $in)) {
3028 2
            // First octet of 3 octet sequence.
3029
            $mUcs4 = $in;
3030 2
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3031
            $mState = 2;
3032
            $mBytes = 3;
3033 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3034
            // First octet of 4 octet sequence.
3035
            $mUcs4 = $in;
3036
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3037
            $mState = 3;
3038
            $mBytes = 4;
3039
          } elseif (0xF8 === (0xFC & $in)) {
3040 1
            /* First octet of 5 octet sequence.
3041
            *
3042 1
            * This is illegal because the encoded codepoint must be either
3043
            * (a) not the shortest form or
3044
            * (b) outside the Unicode range of 0-0x10FFFF.
3045
            * Rather than trying to resynchronize, we will carry on until the end
3046 1
            * of the sequence and let the later error handling code catch it.
3047
            */
3048
            $mUcs4 = $in;
3049
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3050
            $mState = 4;
3051
            $mBytes = 5;
3052 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3053
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3054
            $mUcs4 = $in;
3055
            $mUcs4 = ($mUcs4 & 1) << 30;
3056
            $mState = 5;
3057
            $mBytes = 6;
3058 1
          } else {
3059
            /* Current octet is neither in the US-ASCII range nor a legal first
3060 1
             * octet of a multi-octet sequence.
3061
             */
3062
            return false;
3063
          }
3064
        } else {
3065
          // When mState is non-zero, we expect a continuation of the multi-octet
3066
          // sequence
3067
          if (0x80 === (0xC0 & $in)) {
3068
            // Legal continuation.
3069
            $shift = ($mState - 1) * 6;
3070 16
            $tmp = $in;
3071
            $tmp = ($tmp & 0x0000003F) << $shift;
3072 16
            $mUcs4 |= $tmp;
3073
            /**
3074 16
             * End of the multi-octet sequence. mUcs4 now contains the final
3075 2
             * Unicode code point to be output
3076
             */
3077
            if (0 === --$mState) {
3078 16
              /*
3079 1
              * Check for illegal sequences and code points.
3080
              */
3081
              // From Unicode 3.1, non-shortest form is illegal
3082 16
              if (
3083 4
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3084
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3085
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3086 15
                  (4 < $mBytes) ||
3087 14
                  // From Unicode 3.2, surrogate characters are illegal.
3088
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3089
                  // Code points outside the Unicode range are illegal.
3090 4
                  ($mUcs4 > 0x10FFFF)
3091 4
              ) {
3092 4
                return false;
3093
              }
3094
              // initialize UTF8 cache
3095 4
              $mState = 0;
3096 4
              $mUcs4 = 0;
3097 4
              $mBytes = 1;
3098 4
            }
3099 4
          } else {
3100 4
            /**
3101 4
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3102 4
             * Incomplete multi-octet sequence.
3103 4
             */
3104 4
            return false;
3105 4
          }
3106 4
        }
3107 4
      }
3108 4
3109 4
      return true;
3110
    }
3111 4
  }
3112 4
3113 4
  /**
3114
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3115 4
   * Decodes a JSON string
3116
   *
3117 4
   * @link http://php.net/manual/en/function.json-decode.php
3118
   *
3119
   * @param string $json    <p>
3120
   *                        The <i>json</i> string being decoded.
3121
   *                        </p>
3122
   *                        <p>
3123
   *                        This function only works with UTF-8 encoded strings.
3124
   *                        </p>
3125
   *                        <p>PHP implements a superset of
3126
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3127 13
   *                        only supports these values when they are nested inside an array or an object.
3128
   *                        </p>
3129 13
   * @param bool   $assoc   [optional] <p>
3130 13
   *                        When <b>TRUE</b>, returned objects will be converted into
3131
   *                        associative arrays.
3132 13
   *                        </p>
3133 1
   * @param int    $depth   [optional] <p>
3134 1
   *                        User specified recursion depth.
3135 1
   *                        </p>
3136
   * @param int    $options [optional] <p>
3137 13
   *                        Bitmask of JSON decode options. Currently only
3138
   *                        <b>JSON_BIGINT_AS_STRING</b>
3139
   *                        is supported (default is to cast large integers as floats)
3140
   *                        </p>
3141
   *
3142
   * @return mixed the value encoded in <i>json</i> in appropriate
3143
   * PHP type. Values true, false and
3144
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3145
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3146
   * <i>json</i> cannot be decoded or if the encoded
3147
   * data is deeper than the recursion limit.
3148
   */
3149
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3150 18
  {
3151
    $json = self::filter($json);
3152 18
3153 18
    if (Bootup::is_php('5.4') === true) {
3154
      $json = json_decode($json, $assoc, $depth, $options);
3155 18
    } else {
3156
      $json = json_decode($json, $assoc, $depth);
3157 18
    }
3158
3159 2
    return $json;
3160
  }
3161 2
3162
  /**
3163 1
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3164 1
   * Returns the JSON representation of a value.
3165
   *
3166 2
   * @link http://php.net/manual/en/function.json-encode.php
3167 2
   *
3168
   * @param mixed $value   <p>
3169 18
   *                       The <i>value</i> being encoded. Can be any type except
3170 18
   *                       a resource.
3171 1
   *                       </p>
3172 1
   *                       <p>
3173
   *                       All string data must be UTF-8 encoded.
3174 18
   *                       </p>
3175 18
   *                       <p>PHP implements a superset of
3176
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3177 18
   *                       only supports these values when they are nested inside an array or an object.
3178
   *                       </p>
3179
   * @param int   $options [optional] <p>
3180
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3181
   *                       <b>JSON_HEX_TAG</b>,
3182
   *                       <b>JSON_HEX_AMP</b>,
3183
   *                       <b>JSON_HEX_APOS</b>,
3184
   *                       <b>JSON_NUMERIC_CHECK</b>,
3185
   *                       <b>JSON_PRETTY_PRINT</b>,
3186
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3187
   *                       <b>JSON_FORCE_OBJECT</b>,
3188
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3189
   *                       constants is described on
3190
   *                       the JSON constants page.
3191
   *                       </p>
3192
   * @param int   $depth   [optional] <p>
3193
   *                       Set the maximum depth. Must be greater than zero.
3194
   *                       </p>
3195
   *
3196
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3197
   */
3198
  public static function json_encode($value, $options = 0, $depth = 512)
3199
  {
3200
    $value = self::filter($value);
3201
3202
    if (Bootup::is_php('5.5')) {
3203
      $json = json_encode($value, $options, $depth);
3204
    } else {
3205
      $json = json_encode($value, $options);
3206
    }
3207
3208
    return $json;
3209
  }
3210
3211
  /**
3212
   * Makes string's first char lowercase.
3213
   *
3214
   * @param string $str <p>The input string</p>
3215
   *
3216
   * @return string <p>The resulting string</p>
3217
   */
3218
  public static function lcfirst($str)
3219
  {
3220
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3221
  }
3222
3223
  /**
3224
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3225
   *
3226
   * @param string $str   <p>The string to be trimmed</p>
3227
   * @param string $chars <p>Optional characters to be stripped</p>
3228
   *
3229
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3230 17
   */
3231 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3232 17
  {
3233 3
    $str = (string)$str;
3234
3235
    if (!isset($str[0])) {
3236 16
      return '';
3237
    }
3238
3239
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3240 16
    if ($chars === INF || !$chars) {
3241
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3242
    }
3243
3244
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3245
  }
3246
3247
  /**
3248 16
   * Returns the UTF-8 character with the maximum code point in the given data.
3249 16
   *
3250 15
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3251
   *
3252
   * @return string <p>The character with the highest code point than others.</p>
3253 9
   */
3254 9 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3255 9
  {
3256
    if (is_array($arg)) {
3257 9
      $arg = implode('', $arg);
3258 1
    }
3259
3260
    return self::chr(max(self::codepoints($arg)));
3261 9
  }
3262 4
3263
  /**
3264
   * Calculates and returns the maximum number of bytes taken by any
3265 9
   * UTF-8 encoded character in the given string.
3266 5
   *
3267
   * @param string $str <p>The original Unicode string.</p>
3268
   *
3269 9
   * @return int <p>Max byte lengths of the given chars.</p>
3270
   */
3271
  public static function max_chr_width($str)
3272
  {
3273
    $bytes = self::chr_size_list($str);
3274
    if (count($bytes) > 0) {
3275
      return (int)max($bytes);
3276
    } else {
3277
      return 0;
3278
    }
3279
  }
3280
3281
  /**
3282
   * Checks whether mbstring is available on the server.
3283
   *
3284
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3285 1
   */
3286
  public static function mbstring_loaded()
3287
  {
3288 1
    $return = extension_loaded('mbstring') ? true : false;
3289
3290 1
    if ($return === true) {
3291 1
      \mb_internal_encoding('UTF-8');
3292 1
    }
3293
3294
    return $return;
3295 1
  }
3296
3297
  /**
3298
   * Returns the UTF-8 character with the minimum code point in the given data.
3299
   *
3300
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3301
   *
3302
   * @return string <p>The character with the lowest code point than others.</p>
3303 41
   */
3304 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3305
  {
3306 41
    if (is_array($arg)) {
3307
      $arg = implode('', $arg);
3308
    }
3309
3310
    return self::chr(min(self::codepoints($arg)));
3311
  }
3312
3313
  /**
3314
   * alias for "UTF8::normalize_encoding()"
3315
   *
3316
   * @see UTF8::normalize_encoding()
3317 1
   *
3318
   * @param string $encoding
3319 1
   *
3320 1
   * @return string
3321
   *
3322
   * @deprecated
3323 1
   */
3324 1
  public static function normalizeEncoding($encoding)
3325 1
  {
3326
    return self::normalize_encoding($encoding);
3327
  }
3328 1
3329
  /**
3330
   * Normalize the encoding-"name" input.
3331 1
   *
3332
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3333
   *
3334
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3335 1
   */
3336 1
  public static function normalize_encoding($encoding)
3337 1
  {
3338
    static $staticNormalizeEncodingCache = array();
3339
3340 1
    if (!$encoding) {
3341
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3342
    }
3343 1
3344
    if ('UTF-8' === $encoding) {
3345
      return $encoding;
3346
    }
3347 1
3348
    if (in_array($encoding, self::$iconvEncoding, true)) {
3349 1
      return $encoding;
3350 1
    }
3351 1
3352 1
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3353 1
      return $staticNormalizeEncodingCache[$encoding];
3354
    }
3355
3356
    $encodingOrig = $encoding;
3357
    $encoding = strtoupper($encoding);
3358
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3359
3360
    $equivalences = array(
3361
        'ISO88591'    => 'ISO-8859-1',
3362
        'ISO8859'     => 'ISO-8859-1',
3363
        'ISO'         => 'ISO-8859-1',
3364
        'LATIN1'      => 'ISO-8859-1',
3365 5
        'LATIN'       => 'ISO-8859-1',
3366
        'WIN1252'     => 'ISO-8859-1',
3367 5
        'WINDOWS1252' => 'ISO-8859-1',
3368
        'UTF16'       => 'UTF-16',
3369
        'UTF32'       => 'UTF-32',
3370
        'UTF8'        => 'UTF-8',
3371
        'UTF'         => 'UTF-8',
3372
        'UTF7'        => 'UTF-7',
3373
        '8BIT'        => 'CP850',
3374
        'BINARY'      => 'CP850',
3375
    );
3376
3377 10
    if (!empty($equivalences[$encodingUpperHelper])) {
3378
      $encoding = $equivalences[$encodingUpperHelper];
3379 10
    }
3380 10
3381 5
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3382 5
3383 10
    return $encoding;
3384
  }
3385 10
3386
  /**
3387
   * Normalize some MS Word special characters.
3388
   *
3389
   * @param string $str <p>The string to be normalized.</p>
3390
   *
3391
   * @return string
3392
   */
3393 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3394
  {
3395
    // init
3396 1
    $str = (string)$str;
3397
3398 1
    if (!isset($str[0])) {
3399 1
      return '';
3400 1
    }
3401
3402 1
    static $UTF8_MSWORD_KEYS_CACHE = null;
3403 1
    static $UTF8_MSWORD_VALUES_CACHE = null;
3404 1
3405 1
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3406 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3407
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3408 1
    }
3409
3410
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3411
  }
3412
3413
  /**
3414
   * Normalize the whitespace.
3415
   *
3416
   * @param string $str                     <p>The string to be normalized.</p>
3417
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3418
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3419
   *                                        bidirectional text chars.</p>
3420
   *
3421
   * @return string
3422
   */
3423
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3424 45
  {
3425
    // init
3426
    $str = (string)$str;
3427 45
3428
    if (!isset($str[0])) {
3429
      return '';
3430
    }
3431 45
3432 45
    static $WHITESPACE_CACHE = array();
3433 45
    $cacheKey = (int)$keepNonBreakingSpace;
3434 45
3435
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3436 45
3437
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3438
3439 45
      if ($keepNonBreakingSpace === true) {
3440 45
        /** @noinspection OffsetOperationsInspection */
3441
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3442 45
      }
3443
3444
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3445
    }
3446
3447
    if ($keepBidiUnicodeControls === false) {
3448
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3449
3450
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3451
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3452
      }
3453 45
3454
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3455 45
    }
3456
3457 45
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3458 45
  }
3459 45
3460
  /**
3461 45
   * Format a number with grouped thousands.
3462 45
   *
3463 45
   * @param float  $number
3464
   * @param int    $decimals
3465 45
   * @param string $dec_point
3466
   * @param string $thousands_sep
3467
   *
3468
   * @return string
3469
   *    *
3470
   * @deprecated Because this has nothing to do with UTF8. :/
3471
   */
3472
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3473
  {
3474
    $thousands_sep = (string)$thousands_sep;
3475
    $dec_point = (string)$dec_point;
3476 23
3477
    if (
3478 23
        isset($thousands_sep[1], $dec_point[1])
3479
        &&
3480 23
        Bootup::is_php('5.4') === true
3481 5
    ) {
3482
      return str_replace(
3483
          array(
3484
              '.',
3485 19
              ',',
3486 3
          ),
3487
          array(
3488
              $dec_point,
3489 18
              $thousands_sep,
3490
          ),
3491 18
          number_format($number, $decimals, '.', ',')
3492
      );
3493
    }
3494
3495
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3496
  }
3497
3498
  /**
3499
   * Calculates Unicode code point of the given UTF-8 encoded character.
3500
   *
3501
   * INFO: opposite to UTF8::chr()
3502 52
   *
3503
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3504 52
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3505
   *
3506 52
   * @return int <p>
3507
   *             Unicode code point of the given character,<br />
3508 52
   *             0 on invalid UTF-8 byte sequence.
3509 40
   *             </p>
3510
   */
3511
  public static function ord($chr, $encoding = 'UTF-8')
3512 18
  {
3513
    if (!$chr && $chr !== '0') {
3514
      return 0;
3515 18
    }
3516 17
3517
    if ($encoding !== 'UTF-8') {
3518 17
      $encoding = self::normalize_encoding($encoding);
3519 17
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3520 17
    }
3521 2
3522 2
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3523
      self::checkForSupport();
3524
    }
3525 18
3526
    if (self::$support['intlChar'] === true) {
3527 18
      $tmpReturn = \IntlChar::ord($chr);
3528 18
      if ($tmpReturn) {
3529 18
        return $tmpReturn;
3530
      }
3531 18
    }
3532 18
3533 18
    // use static cache, if there is no support for "IntlChar"
3534
    static $cache = array();
3535
    if (isset($cache[$chr]) === true) {
3536
      return $cache[$chr];
3537 18
    }
3538
3539 18
    $chr_orig = $chr;
3540
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3541
    $chr = unpack('C*', substr($chr, 0, 4));
3542
    $code = $chr ? $chr[1] : 0;
3543
3544
    if (0xF0 <= $code && isset($chr[4])) {
3545
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3546
    }
3547
3548
    if (0xE0 <= $code && isset($chr[3])) {
3549
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3550
    }
3551
3552
    if (0xC0 <= $code && isset($chr[2])) {
3553
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3554
    }
3555
3556
    return $cache[$chr_orig] = $code;
3557
  }
3558
3559
  /**
3560 1
   * Parses the string into an array (into the the second parameter).
3561
   *
3562 1
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3563 1
   *          if the second parameter is not set!
3564
   *
3565
   * @link http://php.net/manual/en/function.parse-str.php
3566
   *
3567
   * @param string  $str       <p>The input string.</p>
3568 1
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3569 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3570 1
   *
3571 1
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3572
   */
3573
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3574 1
  {
3575
    if ($cleanUtf8 === true) {
3576
      $str = self::clean($str);
3577
    }
3578
3579
    $return = \mb_parse_str($str, $result);
3580
    if ($return === false || empty($result)) {
3581
      return false;
3582
    }
3583
3584
    return true;
3585
  }
3586 36
3587
  /**
3588 36
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3589
   *
3590 36
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3591 2
   */
3592
  public static function pcre_utf8_support()
3593
  {
3594
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3595 36
    return (bool)@preg_match('//u', '');
3596 36
  }
3597
3598 36
  /**
3599
   * Create an array containing a range of UTF-8 characters.
3600
   *
3601
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3602 36
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3603
   *
3604 36
   * @return array
3605 6
   */
3606 6
  public static function range($var1, $var2)
3607
  {
3608 36
    if (!$var1 || !$var2) {
3609 36
      return array();
3610 36
    }
3611 36
3612 36 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3613
      $start = (int)$var1;
3614 36
    } elseif (ctype_xdigit($var1)) {
3615
      $start = (int)self::hex_to_int($var1);
3616
    } else {
3617
      $start = self::ord($var1);
3618
    }
3619
3620
    if (!$start) {
3621
      return array();
3622
    }
3623
3624 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3625
      $end = (int)$var2;
3626
    } elseif (ctype_xdigit($var2)) {
3627
      $end = (int)self::hex_to_int($var2);
3628
    } else {
3629
      $end = self::ord($var2);
3630
    }
3631
3632
    if (!$end) {
3633
      return array();
3634
    }
3635
3636
    return array_map(
3637
        array(
3638
            '\\voku\\helper\\UTF8',
3639
            'chr',
3640
        ),
3641
        range($start, $end)
3642
    );
3643
  }
3644
3645
  /**
3646 36
   * Multi decode html entity & fix urlencoded-win1252-chars.
3647 5
   *
3648
   * e.g:
3649 5
   * 'test+test'                     => 'test+test'
3650 5
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3651
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3652
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3653 36
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3654
   * 'Düsseldorf'                   => 'Düsseldorf'
3655
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3656
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3657 36
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3658
   *
3659
   * @param string $str          <p>The input string.</p>
3660
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3661
   *
3662
   * @return string
3663
   */
3664 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3665
  {
3666
    $str = (string)$str;
3667
3668
    if (!isset($str[0])) {
3669
      return '';
3670 12
    }
3671
3672
    $pattern = '/%u([0-9a-f]{3,4})/i';
3673
    if (preg_match($pattern, $str)) {
3674
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3675
    }
3676 12
3677 2
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3678 1
3679 2
    do {
3680 1
      $str_compare = $str;
3681 2
3682
      $str = self::fix_simple_utf8(
3683 2
          rawurldecode(
3684
              self::html_entity_decode(
3685
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3686 2
                  $flags
3687
              )
3688
          )
3689
      );
3690
3691
    } while ($multi_decode === true && $str_compare !== $str);
3692 12
3693 3
    return (string)$str;
3694
  }
3695
3696
  /**
3697
   * alias for "UTF8::remove_bom()"
3698
   *
3699
   * @see UTF8::remove_bom()
3700 12
   *
3701 9
   * @param string $str
3702
   *
3703
   * @return string
3704
   *
3705
   * @deprecated
3706
   */
3707
  public static function removeBOM($str)
3708
  {
3709
    return self::remove_bom($str);
3710 6
  }
3711 6
3712 6
  /**
3713 6
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3714 6
   *
3715 6
   * @param string $str <p>The input string.</p>
3716 6
   *
3717 6
   * @return string <p>String without UTF-BOM</p>
3718 6
   */
3719 6
  public static function remove_bom($str)
3720 6
  {
3721 6
    foreach (self::$bom as $bomString => $bomByteLength) {
3722 6
      if (0 === strpos($str, $bomString)) {
3723 6
        $str = substr($str, $bomByteLength);
3724 6
      }
3725 6
    }
3726 6
3727 6
    return $str;
3728 6
  }
3729 6
3730 6
  /**
3731
   * Removes duplicate occurrences of a string in another string.
3732 6
   *
3733 6
   * @param string          $str  <p>The base string.</p>
3734 6
   * @param string|string[] $what <p>String to search for in the base string.</p>
3735
   *
3736
   * @return string <p>The result string with removed duplicates.</p>
3737
   */
3738
  public static function remove_duplicates($str, $what = ' ')
3739
  {
3740
    if (is_string($what)) {
3741
      $what = array($what);
3742
    }
3743
3744
    if (is_array($what)) {
3745
      /** @noinspection ForeachSourceInspection */
3746
      foreach ($what as $item) {
3747
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3748
      }
3749
    }
3750
3751
    return $str;
3752
  }
3753
3754
  /**
3755
   * Remove invisible characters from a string.
3756
   *
3757
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3758
   *
3759
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3760
   *
3761
   * @param string $str
3762
   * @param bool   $url_encoded
3763
   * @param string $replacement
3764
   *
3765
   * @return string
3766
   */
3767
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3768
  {
3769
    // init
3770
    $non_displayables = array();
3771
3772
    // every control character except newline (dec 10),
3773
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3774
    if ($url_encoded) {
3775
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3776
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3777
    }
3778 14
3779
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3780 14
3781
    do {
3782
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3783 14
    } while ($count !== 0);
3784 14
3785 1
    return $str;
3786 1
  }
3787 13
3788
  /**
3789 14
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3790
   *
3791 14
   * @param string $str                <p>The input string</p>
3792 14
   * @param string $replacementChar    <p>The replacement character.</p>
3793
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3794 14
   *
3795
   * @return string
3796
   */
3797
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
3798
  {
3799
    $str = (string)$str;
3800
3801
    if (!isset($str[0])) {
3802
      return '';
3803
    }
3804
3805
    if ($processInvalidUtf8 === true) {
3806 1
      $replacementCharHelper = $replacementChar;
3807
      if ($replacementChar === '') {
3808 1
        $replacementCharHelper = 'none';
3809
      }
3810 1
3811
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3812
        self::checkForSupport();
3813
      }
3814 1
3815
      if (self::$support['mbstring'] === false) {
3816 1
        trigger_error('UTF8::replace_diamond_question_mark() without mbstring cannot handle all chars correctly', E_USER_WARNING);
3817
      }
3818
3819
      $save = \mb_substitute_character();
3820 1
      \mb_substitute_character($replacementCharHelper);
3821 1
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3822
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3823
      \mb_substitute_character($save);
3824 1
    }
3825 1
3826 1
    return str_replace(
3827 1
        array(
3828
            "\xEF\xBF\xBD",
3829 1
            '�',
3830
        ),
3831
        array(
3832 1
            $replacementChar,
3833
            $replacementChar,
3834
        ),
3835 1
        $str
3836
    );
3837
  }
3838
3839
  /**
3840
   * Strip whitespace or other characters from end of a UTF-8 string.
3841
   *
3842
   * @param string $str   <p>The string to be trimmed.</p>
3843
   * @param string $chars <p>Optional characters to be stripped.</p>
3844
   *
3845
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3846
   */
3847 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3848
  {
3849
    $str = (string)$str;
3850
3851 2
    if (!isset($str[0])) {
3852
      return '';
3853 2
    }
3854
3855
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3856 2
    if ($chars === INF || !$chars) {
3857 2
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3858
    }
3859 2
3860
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3861 2
  }
3862 2
3863
  /**
3864 2
   * rxClass
3865
   *
3866
   * @param string $s
3867 2
   * @param string $class
3868 2
   *
3869 2
   * @return string
3870 2
   */
3871 2
  private static function rxClass($s, $class = '')
3872
  {
3873 2
    static $rxClassCache = array();
3874 2
3875 2
    $cacheKey = $s . $class;
3876 2
3877 2
    if (isset($rxClassCache[$cacheKey])) {
3878 2
      return $rxClassCache[$cacheKey];
3879
    }
3880 2
3881 2
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3882 2
    $class = array($class);
3883 2
3884 2
    /** @noinspection SuspiciousLoopInspection */
3885 2
    foreach (self::str_split($s) as $s) {
3886
      if ('-' === $s) {
3887 2
        $class[0] = '-' . $class[0];
3888
      } elseif (!isset($s[2])) {
3889
        $class[0] .= preg_quote($s, '/');
3890 2
      } elseif (1 === self::strlen($s)) {
3891
        $class[0] .= $s;
3892
      } else {
3893
        $class[] = $s;
3894
      }
3895
    }
3896
3897
    if ($class[0]) {
3898
      $class[0] = '[' . $class[0] . ']';
3899
    }
3900
3901
    if (1 === count($class)) {
3902
      $return = $class[0];
3903
    } else {
3904
      $return = '(?:' . implode('|', $class) . ')';
3905
    }
3906
3907
    $rxClassCache[$cacheKey] = $return;
3908
3909
    return $return;
3910
  }
3911 1
3912
  /**
3913 1
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3914
   */
3915 1
  public static function showSupport()
3916
  {
3917
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3918
      self::checkForSupport();
3919
    }
3920
3921
    foreach (self::$support as $utf8Support) {
3922
      echo $utf8Support . "\n<br>";
3923
    }
3924
  }
3925
3926
  /**
3927
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3928
   *
3929
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3930
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3931
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3932
   *
3933
   * @return string <p>The HTML numbered entity.</p>
3934
   */
3935
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3936
  {
3937
    // init
3938
    $char = (string)$char;
3939
3940
    if (!isset($char[0])) {
3941
      return '';
3942
    }
3943
3944
    if (
3945
        $keepAsciiChars === true
3946
        &&
3947 12
        self::is_ascii($char) === true
3948
    ) {
3949 12
      return $char;
3950
    }
3951
3952
    if ($encoding !== 'UTF-8') {
3953
      $encoding = self::normalize_encoding($encoding);
3954
    }
3955
3956
    return '&#' . self::ord($char, $encoding) . ';';
3957
  }
3958
3959 1
  /**
3960
   * Convert a string to an array of Unicode characters.
3961 1
   *
3962
   * @param string  $str       <p>The string to split into array.</p>
3963 1
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3964
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3965 1
   *
3966
   * @return string[] <p>An array containing chunks of the string.</p>
3967
   */
3968
  public static function split($str, $length = 1, $cleanUtf8 = false)
3969
  {
3970
    $str = (string)$str;
3971
3972
    if (!isset($str[0])) {
3973
      return array();
3974
    }
3975
3976
    // init
3977 1
    $str = (string)$str;
3978
    $ret = array();
3979 1
3980
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3981 1
      self::checkForSupport();
3982 1
    }
3983 1
3984
    if (self::$support['pcre_utf8'] === true) {
3985 1
3986 1
      if ($cleanUtf8 === true) {
3987 1
        $str = self::clean($str);
3988 1
      }
3989
3990
      preg_match_all('/./us', $str, $retArray);
3991 1
      if (isset($retArray[0])) {
3992
        $ret = $retArray[0];
3993
      }
3994
      unset($retArray);
3995
3996
    } else {
3997
3998
      // fallback
3999
4000
      $len = strlen($str);
4001
4002 21
      /** @noinspection ForeachInvariantsInspection */
4003
      for ($i = 0; $i < $len; $i++) {
4004
        if (($str[$i] & "\x80") === "\x00") {
4005 21
          $ret[] = $str[$i];
4006 21
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4007
          if (($str[$i + 1] & "\xC0") === "\x80") {
4008 21
            $ret[] = $str[$i] . $str[$i + 1];
4009 1
4010
            $i++;
4011
          }
4012 20 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4013
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4014
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4015
4016 20
            $i += 2;
4017 20
          }
4018
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4019 20 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4020 20
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4021
4022
            $i += 3;
4023 1
          }
4024 1
        }
4025
      }
4026
    }
4027 1
4028 1
    if ($length > 1) {
4029 1
      $ret = array_chunk($ret, $length);
4030 1
4031 1
      return array_map(
4032
          function ($item) {
4033 1
            return implode('', $item);
4034
          }, $ret
4035 1
      );
4036
    }
4037
4038
    /** @noinspection OffsetOperationsInspection */
4039
    if (isset($ret[0]) && $ret[0] === '') {
4040
      return array();
4041
    }
4042
4043
    return $ret;
4044
  }
4045 1
4046
  /**
4047 1
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4048
   *
4049 1
   * @param string $str <p>The input string.</p>
4050
   *
4051 1
   * @return false|string <p>
4052
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4053
   *                      otherwise it will return false.
4054
   *                      </p>
4055
   */
4056
  public static function str_detect_encoding($str)
4057
  {
4058
    //
4059
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4060
    //
4061
4062
    if (self::is_binary($str)) {
4063
      if (self::is_utf16($str) === 1) {
4064
        return 'UTF-16LE';
4065 7
      } elseif (self::is_utf16($str) === 2) {
4066
        return 'UTF-16BE';
4067 7
      } elseif (self::is_utf32($str) === 1) {
4068
        return 'UTF-32LE';
4069
      } elseif (self::is_utf32($str) === 2) {
4070
        return 'UTF-32BE';
4071
      }
4072
    }
4073
4074
    //
4075
    // 2.) simple check for ASCII chars
4076
    //
4077
4078
    if (self::is_ascii($str) === true) {
4079
      return 'ASCII';
4080
    }
4081
4082
    //
4083 1
    // 3.) simple check for UTF-8 chars
4084
    //
4085 1
4086 1
    if (self::is_utf8($str) === true) {
4087
      return 'UTF-8';
4088 1
    }
4089
4090 1
    //
4091
    // 4.) check via "\mb_detect_encoding()"
4092 1
    //
4093 1
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4094 1
4095 1
    $detectOrder = array(
4096
        'ISO-8859-1',
4097 1
        'ISO-8859-2',
4098
        'ISO-8859-3',
4099 1
        'ISO-8859-4',
4100 1
        'ISO-8859-5',
4101 1
        'ISO-8859-6',
4102 1
        'ISO-8859-7',
4103 1
        'ISO-8859-8',
4104 1
        'ISO-8859-9',
4105
        'ISO-8859-10',
4106 1
        'ISO-8859-13',
4107
        'ISO-8859-14',
4108 1
        'ISO-8859-15',
4109
        'ISO-8859-16',
4110
        'WINDOWS-1251',
4111
        'WINDOWS-1252',
4112 1
        'WINDOWS-1254',
4113
        'ISO-2022-JP',
4114
        'JIS',
4115
        'EUC-JP',
4116
    );
4117
4118
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4119
    if ($encoding) {
4120
      return $encoding;
4121
    }
4122
4123
    //
4124
    // 5.) check via "iconv()"
4125
    //
4126
4127
    $md5 = md5($str);
4128
    foreach (self::$iconvEncoding as $encodingTmp) {
4129 9
      # INFO: //IGNORE and //TRANSLIT still throw notice
4130
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4131 9
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4132
        return $encodingTmp;
4133
      }
4134
    }
4135
4136
    return false;
4137
  }
4138
4139
  /**
4140
   * Check if the string ends with the given substring.
4141
   *
4142
   * @param string $haystack <p>The string to search in.</p>
4143
   * @param string $needle   <p>The substring to search for.</p>
4144
   *
4145
   * @return bool
4146
   */
4147 1 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4148
  {
4149 1
    $haystack = (string)$haystack;
4150
    $needle = (string)$needle;
4151
4152
    if (!isset($haystack[0], $needle[0])) {
4153
      return false;
4154
    }
4155
4156
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4157
      return true;
4158
    }
4159
4160
    return false;
4161
  }
4162
4163
  /**
4164 12
   * Check if the string ends with the given substring, case insensitive.
4165
   *
4166 12
   * @param string $haystack <p>The string to search in.</p>
4167 11
   * @param string $needle   <p>The substring to search for.</p>
4168 11
   *
4169 12
   * @return bool
4170
   */
4171 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4172
  {
4173
    $haystack = (string)$haystack;
4174
    $needle = (string)$needle;
4175
4176
    if (!isset($haystack[0], $needle[0])) {
4177
      return false;
4178
    }
4179
4180
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4181
      return true;
4182 9
    }
4183
4184 9
    return false;
4185 1
  }
4186
4187
  /**
4188 8
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4189 2
   *
4190 2
   * @link  http://php.net/manual/en/function.str-ireplace.php
4191
   *
4192 8
   * @param mixed $search  <p>
4193 8
   *                       Every replacement with search array is
4194 1
   *                       performed on the result of previous replacement.
4195
   *                       </p>
4196
   * @param mixed $replace <p>
4197 7
   *                       </p>
4198
   * @param mixed $subject <p>
4199 7
   *                       If subject is an array, then the search and
4200
   *                       replace is performed with every entry of
4201
   *                       subject, and the return value is an array as
4202 1
   *                       well.
4203
   *                       </p>
4204
   * @param int   $count   [optional] <p>
4205
   *                       The number of matched and replaced needles will
4206
   *                       be returned in count which is passed by
4207
   *                       reference.
4208
   *                       </p>
4209
   *
4210
   * @return mixed <p>A string or an array of replacements.</p>
4211
   */
4212
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4213
  {
4214
    $search = (array)$search;
4215
4216
    /** @noinspection AlterInForeachInspection */
4217
    foreach ($search as &$s) {
4218 1
      if ('' === $s .= '') {
4219
        $s = '/^(?<=.)$/';
4220 1
      } else {
4221
        $s = '/' . preg_quote($s, '/') . '/ui';
4222
      }
4223
    }
4224
4225
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4226
    $count = $replace; // used as reference parameter
4227
4228
    return $subject;
4229
  }
4230
4231
  /**
4232 2
   * Check if the string starts with the given substring, case insensitive.
4233
   *
4234 2
   * @param string $haystack <p>The string to search in.</p>
4235 2
   * @param string $needle   <p>The substring to search for.</p>
4236
   *
4237 2
   * @return bool
4238 2
   */
4239 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4240
  {
4241 2
    $haystack = (string)$haystack;
4242 2
    $needle = (string)$needle;
4243
4244
    if (!isset($haystack[0], $needle[0])) {
4245
      return false;
4246
    }
4247
4248
    if (self::stripos($haystack, $needle) === 0) {
4249
      return true;
4250
    }
4251
4252 3
    return false;
4253
  }
4254 3
4255 3
  /**
4256 3
   * Limit the number of characters in a string, but also after the next word.
4257
   *
4258 3
   * @param string $str
4259
   * @param int    $length
4260 3
   * @param string $strAddOn
4261
   *
4262
   * @return string
4263
   */
4264
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4265
  {
4266
    $str = (string)$str;
4267
4268
    if (!isset($str[0])) {
4269
      return '';
4270
    }
4271
4272
    $length = (int)$length;
4273
4274
    if (self::strlen($str) <= $length) {
4275
      return $str;
4276
    }
4277
4278
    if (self::substr($str, $length - 1, 1) === ' ') {
4279
      return self::substr($str, 0, $length - 1) . $strAddOn;
4280
    }
4281
4282 2
    $str = self::substr($str, 0, $length);
4283
    $array = explode(' ', $str);
4284
    array_pop($array);
4285 2
    $new_str = implode(' ', $array);
4286
4287 2
    if ($new_str === '') {
4288
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4289
    } else {
4290
      $str = $new_str . $strAddOn;
4291
    }
4292
4293
    return $str;
4294
  }
4295
4296
  /**
4297
   * Pad a UTF-8 string to given length with another string.
4298
   *
4299
   * @param string $str        <p>The input string.</p>
4300
   * @param int    $pad_length <p>The length of return string.</p>
4301
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4302
   * @param int    $pad_type   [optional] <p>
4303
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4304
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4305
   *                           </p>
4306
   *
4307
   * @return string <strong>Returns the padded string</strong>
4308
   */
4309
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4310
  {
4311
    $str_length = self::strlen($str);
4312
4313
    if (
4314 8
        is_int($pad_length) === true
4315
        &&
4316 8
        $pad_length > 0
4317 8
        &&
4318
        $pad_length >= $str_length
4319 8
    ) {
4320 3
      $ps_length = self::strlen($pad_string);
4321
4322
      $diff = $pad_length - $str_length;
4323 7
4324 1
      switch ($pad_type) {
4325 1 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4326 1
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4327
          $pre = self::substr($pre, 0, $diff);
4328
          $post = '';
4329
          break;
4330 7
4331 1
        case STR_PAD_BOTH:
4332 7
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4333 7
          $pre = self::substr($pre, 0, (int)$diff / 2);
4334 7
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4335
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4336
          break;
4337
4338 7
        case STR_PAD_RIGHT:
4339 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4340
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4341
          $post = self::substr($post, 0, $diff);
4342
          $pre = '';
4343
      }
4344
4345
      return $pre . $str . $post;
4346
    }
4347
4348
    return $str;
4349
  }
4350
4351
  /**
4352
   * Repeat a string.
4353
   *
4354
   * @param string $str        <p>
4355 8
   *                           The string to be repeated.
4356
   *                           </p>
4357 8
   * @param int    $multiplier <p>
4358 2
   *                           Number of time the input string should be
4359
   *                           repeated.
4360
   *                           </p>
4361 6
   *                           <p>
4362
   *                           multiplier has to be greater than or equal to 0.
4363
   *                           If the multiplier is set to 0, the function
4364
   *                           will return an empty string.
4365 6
   *                           </p>
4366
   *
4367
   * @return string <p>The repeated string.</p>
4368
   */
4369
  public static function str_repeat($str, $multiplier)
4370
  {
4371
    $str = self::filter($str);
4372 6
4373
    return str_repeat($str, $multiplier);
4374
  }
4375
4376
  /**
4377
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4378
   *
4379
   * Replace all occurrences of the search string with the replacement string
4380
   *
4381
   * @link http://php.net/manual/en/function.str-replace.php
4382
   *
4383
   * @param mixed $search  <p>
4384
   *                       The value being searched for, otherwise known as the needle.
4385
   *                       An array may be used to designate multiple needles.
4386
   *                       </p>
4387 62
   * @param mixed $replace <p>
4388
   *                       The replacement value that replaces found search
4389 62
   *                       values. An array may be used to designate multiple replacements.
4390
   *                       </p>
4391 62
   * @param mixed $subject <p>
4392 4
   *                       The string or array being searched and replaced on,
4393
   *                       otherwise known as the haystack.
4394
   *                       </p>
4395
   *                       <p>
4396
   *                       If subject is an array, then the search and
4397 61
   *                       replace is performed with every entry of
4398 2
   *                       subject, and the return value is an array as
4399 61
   *                       well.
4400 60
   *                       </p>
4401 60
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4402 2
   *
4403
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4404
   */
4405
  public static function str_replace($search, $replace, $subject, &$count = null)
4406 61
  {
4407 61
    return str_replace($search, $replace, $subject, $count);
4408 1
  }
4409
4410
  /**
4411 61
   * Replace the first "$search"-term with the "$replace"-term.
4412 2
   *
4413 2
   * @param string $search
4414
   * @param string $replace
4415 61
   * @param string $subject
4416
   *
4417
   * @return string
4418
   */
4419
  public static function str_replace_first($search, $replace, $subject) {
4420
    $pos = self::strpos($subject, $search);
4421
4422
    if ($pos !== false) {
4423
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4424
    }
4425
4426
    return $subject;
4427
  }
4428
4429
  /**
4430 1
   * Shuffles all the characters in the string.
4431
   *
4432 1
   * @param string $str <p>The input string</p>
4433
   *
4434
   * @return string <p>The shuffled string.</p>
4435
   */
4436
  public static function str_shuffle($str)
4437
  {
4438
    $array = self::split($str);
4439
4440
    shuffle($array);
4441
4442
    return implode('', $array);
4443
  }
4444
4445
  /**
4446
   * Sort all characters according to code points.
4447
   *
4448
   * @param string $str    <p>A UTF-8 string.</p>
4449 2
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4450
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4451 2
   *
4452
   * @return string <p>String of sorted characters.</p>
4453
   */
4454
  public static function str_sort($str, $unique = false, $desc = false)
4455
  {
4456
    $array = self::codepoints($str);
4457
4458
    if ($unique) {
4459
      $array = array_flip(array_flip($array));
4460
    }
4461
4462
    if ($desc) {
4463
      arsort($array);
4464
    } else {
4465
      asort($array);
4466
    }
4467 1
4468
    return self::string($array);
4469 1
  }
4470
4471
  /**
4472
   * Split a string into an array.
4473
   *
4474
   * @param string $str
4475
   * @param int    $len
4476
   *
4477
   * @return array
4478
   */
4479
  public static function str_split($str, $len = 1)
4480
  {
4481
    // init
4482
    $len = (int)$len;
4483
    $str = (string)$str;
4484
4485 2
    if (!isset($str[0])) {
4486
      return array();
4487 2
    }
4488 2
4489
    if ($len < 1) {
4490 2
      return str_split($str, $len);
4491
    }
4492
4493
    /** @noinspection PhpInternalEntityUsedInspection */
4494
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4495
    $a = $a[0];
4496
4497
    if ($len === 1) {
4498
      return $a;
4499
    }
4500
4501
    $arrayOutput = array();
4502
    $p = -1;
4503 1
4504
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4505 1
    foreach ($a as $l => $a) {
4506 1
      if ($l % $len) {
4507
        $arrayOutput[$p] .= $a;
4508 1
      } else {
4509 1
        $arrayOutput[++$p] = $a;
4510
      }
4511
    }
4512 1
4513 1
    return $arrayOutput;
4514
  }
4515 1
4516
  /**
4517
   * Check if the string starts with the given substring.
4518
   *
4519
   * @param string $haystack <p>The string to search in.</p>
4520
   * @param string $needle   <p>The substring to search for.</p>
4521
   *
4522
   * @return bool
4523
   */
4524 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4525
  {
4526
    $haystack = (string)$haystack;
4527
    $needle = (string)$needle;
4528
4529
    if (!isset($haystack[0], $needle[0])) {
4530
      return false;
4531
    }
4532
4533
    if (self::strpos($haystack, $needle) === 0) {
4534
      return true;
4535 15
    }
4536
4537 15
    return false;
4538 15
  }
4539
4540 15
  /**
4541 2
   * Get a binary representation of a specific string.
4542
   *
4543
   * @param string $str <p>The input string.</p>
4544
   *
4545 14
   * @return string
4546
   */
4547
  public static function str_to_binary($str)
4548
  {
4549 14
    $str = (string)$str;
4550
4551
    $value = unpack('H*', $str);
4552
4553 14
    return base_convert($value[1], 16, 2);
4554
  }
4555
4556 2
  /**
4557 2
   * Convert a string into an array of words.
4558 2
   *
4559
   * @param string $str
4560 14
   * @param string $charlist
4561
   *
4562
   * @return array
4563
   */
4564
  public static function str_to_words($str, $charlist = '')
4565
  {
4566 14
    $str = (string)$str;
4567 2
4568 14
    if (!isset($str[0])) {
4569 14
      return array('');
4570 14
    }
4571 1
4572
    $charlist = self::rxClass($charlist, '\pL');
4573
4574 14
    return \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4575 14
  }
4576
4577
  /**
4578
   * alias for "UTF8::to_ascii()"
4579
   *
4580
   * @see UTF8::to_ascii()
4581
   *
4582
   * @param string $str
4583
   * @param string $unknown
4584
   * @param bool   $strict
4585
   *
4586
   * @return string
4587
   */
4588
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4589
  {
4590
    return self::to_ascii($str, $unknown, $strict);
4591
  }
4592
4593
  /**
4594
   * Counts number of words in the UTF-8 string.
4595
   *
4596
   * @param string $str      <p>The input string.</p>
4597
   * @param int    $format   [optional] <p>
4598
   *                         <strong>0</strong> => return a number of words (default)<br />
4599
   *                         <strong>1</strong> => return an array of words<br />
4600
   *                         <strong>2</strong> => return an array of words with word-offset as key
4601
   *                         </p>
4602
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4603
   *
4604
   * @return array|int <p>The number of words in the string</p>
4605
   */
4606
  public static function str_word_count($str, $format = 0, $charlist = '')
4607
  {
4608
    $strParts = self::str_to_words($str, $charlist);
4609
4610
    $len = count($strParts);
4611
4612
    if ($format === 1) {
4613
4614
      $numberOfWords = array();
4615
      for ($i = 1; $i < $len; $i += 2) {
4616
        $numberOfWords[] = $strParts[$i];
4617
      }
4618
4619
    } elseif ($format === 2) {
4620 1
4621
      $numberOfWords = array();
4622 1
      $offset = self::strlen($strParts[0]);
4623 1
      for ($i = 1; $i < $len; $i += 2) {
4624 1
        $numberOfWords[$offset] = $strParts[$i];
4625
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4626 1
      }
4627
4628
    } else {
4629
4630
      $numberOfWords = ($len - 1) / 2;
4631
4632
    }
4633 1
4634
    return $numberOfWords;
4635
  }
4636
4637
  /**
4638
   * Case-insensitive string comparison.
4639
   *
4640
   * INFO: Case-insensitive version of UTF8::strcmp()
4641
   *
4642
   * @param string $str1
4643 4
   * @param string $str2
4644
   *
4645 4
   * @return int <p>
4646
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4647 4
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4648 2
   *             <strong>0</strong> if they are equal.
4649
   *             </p>
4650
   */
4651 3
  public static function strcasecmp($str1, $str2)
4652
  {
4653
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4654
  }
4655
4656
  /**
4657
   * alias for "UTF8::strstr()"
4658
   *
4659
   * @see UTF8::strstr()
4660
   *
4661
   * @param string  $haystack
4662
   * @param string  $needle
4663
   * @param bool    $before_needle
4664
   * @param string  $encoding
4665
   * @param boolean $cleanUtf8
4666
   *
4667
   * @return string|false
4668
   */
4669
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4670
  {
4671
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4672
  }
4673
4674
  /**
4675
   * Case-sensitive string comparison.
4676
   *
4677 1
   * @param string $str1
4678
   * @param string $str2
4679 1
   *
4680 1
   * @return int  <p>
4681 1
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4682
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4683 1
   *              <strong>0</strong> if they are equal.
4684
   *              </p>
4685
   */
4686
  public static function strcmp($str1, $str2)
4687
  {
4688
    /** @noinspection PhpUndefinedClassInspection */
4689
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4690 1
        \Normalizer::normalize($str1, \Normalizer::NFD),
4691
        \Normalizer::normalize($str2, \Normalizer::NFD)
4692
    );
4693
  }
4694
4695
  /**
4696
   * Find length of initial segment not matching mask.
4697
   *
4698
   * @param string $str
4699
   * @param string $charList
4700
   * @param int    $offset
4701
   * @param int    $length
4702
   *
4703
   * @return int|null
4704
   */
4705
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4706
  {
4707 1
    if ('' === $charList .= '') {
4708
      return null;
4709 1
    }
4710
4711
    if ($offset || 2147483647 !== $length) {
4712
      $str = (string)self::substr($str, $offset, $length);
4713
    }
4714
4715
    $str = (string)$str;
4716
    if (!isset($str[0])) {
4717
      return null;
4718
    }
4719
4720
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4721
      /** @noinspection OffsetOperationsInspection */
4722
      return self::strlen($length[1]);
4723
    }
4724
4725
    return self::strlen($str);
4726
  }
4727
4728
  /**
4729 11
   * alias for "UTF8::stristr()"
4730
   *
4731 11
   * @see UTF8::stristr()
4732
   *
4733 11
   * @param string  $haystack
4734 2
   * @param string  $needle
4735 2
   * @param bool    $before_needle
4736
   * @param string  $encoding
4737 11
   * @param boolean $cleanUtf8
4738
   *
4739 11
   * @return string|false
4740 2
   */
4741
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4742
  {
4743
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4744 10
  }
4745 10
4746
  /**
4747
   * Create a UTF-8 string from code points.
4748
   *
4749 10
   * INFO: opposite to UTF8::codepoints()
4750
   *
4751 10
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4752
   *
4753
   * @return string <p>UTF-8 encoded string.</p>
4754 3
   */
4755 3
  public static function string(array $array)
4756 3
  {
4757
    return implode(
4758 10
        '',
4759
        array_map(
4760
            array(
4761
                '\\voku\\helper\\UTF8',
4762
                'chr',
4763
            ),
4764 10
            $array
4765 1
        )
4766 10
    );
4767 10
  }
4768 10
4769 1
  /**
4770
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4771
   *
4772
   * @param string $str <p>The input string.</p>
4773
   *
4774 10
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4775 10
   */
4776 10
  public static function string_has_bom($str)
4777 10
  {
4778
    foreach (self::$bom as $bomString => $bomByteLength) {
4779
      if (0 === strpos($str, $bomString)) {
4780
        return true;
4781
      }
4782
    }
4783
4784
    return false;
4785
  }
4786
4787
  /**
4788
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4789
   *
4790
   * @link http://php.net/manual/en/function.strip-tags.php
4791
   *
4792
   * @param string  $str            <p>
4793
   *                                The input string.
4794
   *                                </p>
4795
   * @param string  $allowable_tags [optional] <p>
4796
   *                                You can use the optional second parameter to specify tags which should
4797
   *                                not be stripped.
4798
   *                                </p>
4799
   *                                <p>
4800
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4801
   *                                can not be changed with allowable_tags.
4802
   *                                </p>
4803
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
4804
   *
4805
   * @return string <p>The stripped string.</p>
4806
   */
4807
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
4808
  {
4809
    if ($cleanUtf8) {
4810
      $str = self::clean($str);
4811
    }
4812
4813 10
    return strip_tags($str, $allowable_tags);
4814
  }
4815
4816 10
  /**
4817 10
   * Finds position of first occurrence of a string within another, case insensitive.
4818
   *
4819 10
   * @link http://php.net/manual/en/function.mb-stripos.php
4820 2
   *
4821 2
   * @param string  $haystack  <p>
4822
   *                           The string from which to get the position of the first occurrence
4823 10
   *                           of needle
4824 10
   *                           </p>
4825 2
   * @param string  $needle    <p>
4826
   *                           The string to find in haystack
4827
   *                           </p>
4828 8
   * @param int     $offset    [optional] <p>
4829
   *                           The position in haystack
4830
   *                           to start searching
4831
   *                           </p>
4832
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4833
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4834
   *
4835
   * @return int|false <p>
4836
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4837
   *                   or false if needle is not found.
4838
   *                   </p>
4839
   */
4840
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4841
  {
4842
    $haystack = (string)$haystack;
4843
    $needle = (string)$needle;
4844
4845 2
    if (!isset($haystack[0], $needle[0])) {
4846
      return false;
4847 2
    }
4848
4849
    if ($cleanUtf8 === true) {
4850
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4851
      // if invalid characters are found in $haystack before $needle
4852
      $haystack = self::clean($haystack);
4853
      $needle = self::clean($needle);
4854 2
    }
4855 1
4856 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4857
        $encoding === 'UTF-8'
4858
        ||
4859
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4860 2
    ) {
4861 2
      $encoding = 'UTF-8';
4862 2
    } else {
4863 2
      $encoding = self::normalize_encoding($encoding);
4864
    }
4865
4866
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4867
      self::checkForSupport();
4868
    }
4869
4870
    if (
4871
        $encoding == 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4872
        &&
4873
        self::$support['intl'] === true
4874
        &&
4875
        Bootup::is_php('5.4')
4876
    ) {
4877
      return \grapheme_stripos($haystack, $needle, $offset);
4878
    }
4879
4880
    // fallback to "mb_"-function via polyfill
4881
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4882 11
  }
4883
4884 11
  /**
4885 11
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4886 11
   *
4887
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4888 11
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4889 1
   * @param bool    $before_needle [optional] <p>
4890 1
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4891 1
   *                               haystack before the first occurrence of the needle (excluding the needle).
4892
   *                               </p>
4893 11
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4894
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4895 11
   *
4896
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4897 11
   */
4898 1
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4899 1
  {
4900
    $haystack = (string)$haystack;
4901
    $needle = (string)$needle;
4902 11
4903 11
    if (!isset($haystack[0], $needle[0])) {
4904
      return false;
4905 11
    }
4906
4907 11
    if ($encoding !== 'UTF-8') {
4908
      $encoding = self::normalize_encoding($encoding);
4909
    }
4910
4911
    if ($cleanUtf8 === true) {
4912
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4913
      // if invalid characters are found in $haystack before $needle
4914
      $needle = self::clean($needle);
4915
      $haystack = self::clean($haystack);
4916
    }
4917
4918
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4919
      self::checkForSupport();
4920
    }
4921 21
4922 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4923
        $encoding !== 'UTF-8'
4924 21
        &&
4925
        self::$support['mbstring'] === false
4926 21
    ) {
4927 6
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4928
    }
4929
4930 19
    if (self::$support['mbstring'] === true) {
4931
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4932
    }
4933
4934
    if (self::$support['intl'] === true) {
4935
      return \grapheme_stristr($haystack, $needle, $before_needle);
4936 19
    }
4937 2
4938 2
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4939
4940 19
    if (!isset($match[1])) {
4941
      return false;
4942
    }
4943
4944
    if ($before_needle) {
4945
      return $match[1];
4946
    }
4947
4948
    return self::substr($haystack, self::strlen($match[1]));
4949
  }
4950 3
4951
  /**
4952 3
   * Get the string length, not the byte-length!
4953
   *
4954
   * @link     http://php.net/manual/en/function.mb-strlen.php
4955
   *
4956
   * @param string  $str       <p>The string being checked for length.</p>
4957
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4958
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4959
   *
4960
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4961
   *             character counted as +1)</p>
4962
   */
4963
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4964
  {
4965
    $str = (string)$str;
4966 16
4967
    if (!isset($str[0])) {
4968 16
      return 0;
4969
    }
4970 16
4971 2 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4972
        $encoding === 'UTF-8'
4973
        ||
4974 15
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4975
    ) {
4976
      $encoding = 'UTF-8';
4977
    } else {
4978
      $encoding = self::normalize_encoding($encoding);
4979
    }
4980 15
4981 2
    switch ($encoding) {
4982 2
      case 'ASCII':
4983
      case 'CP850':
4984 15
        return strlen($str);
4985
    }
4986
4987
    if ($cleanUtf8 === true) {
4988
      $str = self::clean($str);
4989
    }
4990
4991
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4992
      self::checkForSupport();
4993
    }
4994
4995 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4996
        $encoding !== 'UTF-8'
4997
        &&
4998
        self::$support['mbstring'] === false
4999
        &&
5000
        self::$support['iconv'] === false
5001 1
    ) {
5002
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5003 1
    }
5004 1
5005 1
    if (
5006 1
        $encoding !== 'UTF-8'
5007 1
        &&
5008
        self::$support['iconv'] === true
5009 1
        &&
5010 1
        self::$support['mbstring'] === false
5011 1
    ) {
5012 1
      $returnTmp = \iconv_strlen($str, $encoding);
5013 1
      if ($returnTmp !== false) {
5014
        return $returnTmp;
5015 1
      }
5016 1
    }
5017
5018 1
    if (self::$support['mbstring'] === true) {
5019
      return \mb_strlen($str, $encoding);
5020
    }
5021
5022
    if (self::$support['intl'] === true) {
5023
      $str = self::clean($str);
5024
      $returnTmp = \grapheme_strlen($str);
5025
      if ($returnTmp !== null) {
5026
        return $returnTmp;
5027
      }
5028
    }
5029
5030 1
    if (self::$support['iconv'] === true) {
5031
      $returnTmp = \iconv_strlen($str, $encoding);
5032 1
      if ($returnTmp !== false) {
5033 1
        return $returnTmp;
5034 1
      }
5035
    }
5036 1
5037
    // fallback via vanilla php
5038
    preg_match_all('/./us', $str, $parts);
5039
    $returnTmp = count($parts[0]);
5040 1
    if ($returnTmp !== 0) {
5041 1
      return $returnTmp;
5042
    }
5043 1
5044
    // fallback to "mb_"-function via polyfill
5045
    return \mb_strlen($str);
5046
  }
5047
5048
  /**
5049
   * Case insensitive string comparisons using a "natural order" algorithm.
5050
   *
5051
   * INFO: natural order version of UTF8::strcasecmp()
5052
   *
5053
   * @param string $str1 <p>The first string.</p>
5054
   * @param string $str2 <p>The second string.</p>
5055
   *
5056
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5057
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5058
   *             <strong>0</strong> if they are equal
5059 47
   */
5060
  public static function strnatcasecmp($str1, $str2)
5061
  {
5062 47
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5063
  }
5064 47
5065 9
  /**
5066
   * String comparisons using a "natural order" algorithm
5067
   *
5068 45
   * INFO: natural order version of UTF8::strcmp()
5069
   *
5070
   * @link  http://php.net/manual/en/function.strnatcmp.php
5071
   *
5072 1
   * @param string $str1 <p>The first string.</p>
5073 1
   * @param string $str2 <p>The second string.</p>
5074
   *
5075 45
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5076 45
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5077 37
   *             <strong>0</strong> if they are equal
5078 37
   */
5079
  public static function strnatcmp($str1, $str2)
5080 45
  {
5081 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5082
  }
5083
5084 43
  /**
5085 20
   * Case-insensitive string comparison of the first n characters.
5086 20
   *
5087 41
   * @link  http://php.net/manual/en/function.strncasecmp.php
5088
   *
5089
   * @param string $str1 <p>The first string.</p>
5090 43
   * @param string $str2 <p>The second string.</p>
5091
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5092
   *
5093
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5094
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5095
   *             <strong>0</strong> if they are equal
5096 43
   */
5097 2
  public static function strncasecmp($str1, $str2, $len)
5098 43
  {
5099 43
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5100 43
  }
5101 1
5102
  /**
5103
   * String comparison of the first n characters.
5104 43
   *
5105 43
   * @link  http://php.net/manual/en/function.strncmp.php
5106
   *
5107
   * @param string $str1 <p>The first string.</p>
5108
   * @param string $str2 <p>The second string.</p>
5109
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5110
   *
5111
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5112
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5113
   *             <strong>0</strong> if they are equal
5114
   */
5115
  public static function strncmp($str1, $str2, $len)
5116
  {
5117
    $str1 = self::substr($str1, 0, $len);
5118
    $str2 = self::substr($str2, 0, $len);
5119
5120
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5117 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5118 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5121
  }
5122
5123
  /**
5124
   * Search a string for any of a set of characters.
5125
   *
5126
   * @link  http://php.net/manual/en/function.strpbrk.php
5127
   *
5128
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5129
   * @param string $char_list <p>This parameter is case sensitive.</p>
5130
   *
5131
   * @return string String starting from the character found, or false if it is not found.
5132
   */
5133
  public static function strpbrk($haystack, $char_list)
5134
  {
5135 1
    $haystack = (string)$haystack;
5136
    $char_list = (string)$char_list;
5137 1
5138 1
    if (!isset($haystack[0], $char_list[0])) {
5139
      return false;
5140 1
    }
5141
5142
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5143
      return substr($haystack, strpos($haystack, $m[0]));
5144
    } else {
5145
      return false;
5146
    }
5147
  }
5148
5149
  /**
5150
   * Find position of first occurrence of string in a string.
5151
   *
5152
   * @link http://php.net/manual/en/function.mb-strpos.php
5153
   *
5154
   * @param string  $haystack  <p>The string being checked.</p>
5155
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5156
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5157
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5158
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5159
   *
5160
   * @return int|false <p>
5161 1
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5162
   *                   If needle is not found it returns false.
5163 1
   *                   </p>
5164 1
   */
5165
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5166 1
  {
5167 1
    $haystack = (string)$haystack;
5168
    $needle = (string)$needle;
5169
5170 1
    if (!isset($haystack[0], $needle[0])) {
5171 1
      return false;
5172 1
    }
5173
5174 1
    // init
5175 1
    $offset = (int)$offset;
5176
5177
    // iconv and mbstring do not support integer $needle
5178 1
5179 1
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5180
      $needle = (string)self::chr($needle);
5181 1
    }
5182 1
5183 1
    if ($cleanUtf8 === true) {
5184
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5185 1
      // if invalid characters are found in $haystack before $needle
5186
      $needle = self::clean($needle);
5187
      $haystack = self::clean($haystack);
5188
    }
5189
5190 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5191
        $encoding === 'UTF-8'
5192 1
        ||
5193
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5194
    ) {
5195
      $encoding = 'UTF-8';
5196
    } else {
5197
      $encoding = self::normalize_encoding($encoding);
5198
    }
5199
5200
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5201
      self::checkForSupport();
5202
    }
5203
5204 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5205
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...pport['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...port['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5206
        &
5207 6
        self::$support['iconv'] === true
5208
        &&
5209 6
        self::$support['mbstring'] === false
5210 1
    ) {
5211
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5212
    }
5213 1
5214 1
    if (
5215 1
        $offset >= 0 // iconv_strpos() can't handle negative offset
5216 1
        &&
5217
        $encoding !== 'UTF-8'
5218
        &&
5219
        self::$support['mbstring'] === false
5220 1
        &&
5221 1
        self::$support['iconv'] === true
5222 1
    ) {
5223 1
      // ignore invalid negative offset to keep compatibility
5224 1
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5225 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5226 1
    }
5227 1
5228
    if (self::$support['mbstring'] === true) {
5229
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5230
    }
5231 1
5232 1 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5233 1
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
5234 1
      if ($returnTmp !== false) {
5235 1
        return $returnTmp;
5236 1
      }
5237 1
    }
5238 1
5239
    if (
5240
        $offset >= 0 // iconv_strpos() can't handle negative offset
5241 1
        &&
5242 1
        self::$support['iconv'] === true
5243 1
    ) {
5244 1
      // ignore invalid negative offset to keep compatibility
5245
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5246
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5247
    }
5248 1
5249
    // fallback via vanilla php
5250 6
5251 1
    $haystack = self::substr($haystack, $offset);
5252 1
5253 1
    if ($offset < 0) {
5254 1
      $offset = 0;
5255
    }
5256 1
5257
    $pos = strpos($haystack, $needle);
5258
    if ($pos === false) {
5259 6
      return false;
5260 6
    }
5261
5262 6
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5263 4
    if ($returnTmp !== false) {
5264 4
      return $returnTmp;
5265
    }
5266 6
5267
    // fallback to "mb_"-function via polyfill
5268 6
    return \mb_strpos($haystack, $needle, $offset);
5269
  }
5270
5271
  /**
5272
   * Finds the last occurrence of a character in a string within another.
5273
   *
5274
   * @link http://php.net/manual/en/function.mb-strrchr.php
5275
   *
5276
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5277
   * @param string $needle        <p>The string to find in haystack</p>
5278
   * @param bool   $before_needle [optional] <p>
5279
   *                              Determines which portion of haystack
5280 1
   *                              this function returns.
5281
   *                              If set to true, it returns all of haystack
5282 1
   *                              from the beginning to the last occurrence of needle.
5283
   *                              If set to false, it returns all of haystack
5284 1
   *                              from the last occurrence of needle to the end,
5285 1
   *                              </p>
5286
   * @param string $encoding      [optional] <p>
5287
   *                              Character encoding name to use.
5288 1
   *                              If it is omitted, internal character encoding is used.
5289 1
   *                              </p>
5290 1
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5291
   *
5292 1
   * @return string|false The portion of haystack or false if needle is not found.
5293
   */
5294 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5295 1
  {
5296 1
    if ($encoding !== 'UTF-8') {
5297
      $encoding = self::normalize_encoding($encoding);
5298 1
    }
5299 1
5300
    if ($cleanUtf8 === true) {
5301 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5302
      // if invalid characters are found in $haystack before $needle
5303 1
      $needle = self::clean($needle);
5304 1
      $haystack = self::clean($haystack);
5305
    }
5306 1
5307
    // fallback to "mb_"-function via polyfill
5308 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5309
  }
5310 1
5311
  /**
5312 1
   * Reverses characters order in the string.
5313
   *
5314
   * @param string $str The input string
5315
   *
5316
   * @return string The string with characters in the reverse sequence
5317
   */
5318
  public static function strrev($str)
5319
  {
5320
    $str = (string)$str;
5321
5322
    if (!isset($str[0])) {
5323
      return '';
5324
    }
5325
5326 7
    return implode('', array_reverse(self::split($str)));
5327
  }
5328 7
5329
  /**
5330
   * Finds the last occurrence of a character in a string within another, case insensitive.
5331
   *
5332
   * @link http://php.net/manual/en/function.mb-strrichr.php
5333
   *
5334
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5335
   * @param string  $needle        <p>The string to find in haystack.</p>
5336
   * @param bool    $before_needle [optional] <p>
5337
   *                               Determines which portion of haystack
5338
   *                               this function returns.
5339
   *                               If set to true, it returns all of haystack
5340 1
   *                               from the beginning to the last occurrence of needle.
5341
   *                               If set to false, it returns all of haystack
5342 1
   *                               from the last occurrence of needle to the end,
5343
   *                               </p>
5344
   * @param string  $encoding      [optional] <p>
5345
   *                               Character encoding name to use.
5346
   *                               If it is omitted, internal character encoding is used.
5347
   *                               </p>
5348
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5349
   *
5350
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5351
   */
5352 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5353
  {
5354 1
    if ($encoding !== 'UTF-8') {
5355
      $encoding = self::normalize_encoding($encoding);
5356 1
    }
5357
5358
    if ($cleanUtf8 === true) {
5359
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5360
      // if invalid characters are found in $haystack before $needle
5361
      $needle = self::clean($needle);
5362
      $haystack = self::clean($haystack);
5363
    }
5364
5365
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5366
  }
5367
5368 1
  /**
5369
   * Find position of last occurrence of a case-insensitive string.
5370 1
   *
5371
   * @param string  $haystack  <p>The string to look in.</p>
5372
   * @param string  $needle    <p>The string to look for.</p>
5373
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5374
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5375
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5376
   *
5377
   * @return int|false <p>
5378
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5379
   *                   not found, it returns false.
5380
   *                   </p>
5381
   */
5382
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5383
  {
5384
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5385 13
      $needle = (string)self::chr($needle);
5386
    }
5387 13
5388
    // init
5389
    $haystack = (string)$haystack;
5390 13
    $needle = (string)$needle;
5391
    $offset = (int)$offset;
5392 13
5393 3
    if (!isset($haystack[0], $needle[0])) {
5394
      return false;
5395
    }
5396 11
5397 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5398
        $cleanUtf8 === true
5399 11
        ||
5400 7
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5401
    ) {
5402
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5403 5
5404 1
      $needle = self::clean($needle);
5405
      $haystack = self::clean($haystack);
5406
    }
5407
5408 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5409 1
        $encoding === 'UTF-8'
5410
        ||
5411
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5412 1
    ) {
5413 1
      $encoding = 'UTF-8';
5414
    } else {
5415
      $encoding = self::normalize_encoding($encoding);
5416 1
    }
5417
5418
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5419 1
      self::checkForSupport();
5420
    }
5421 5
5422 5 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5423 5
        $encoding !== 'UTF-8'
5424
        &&
5425 5
        self::$support['mbstring'] === false
5426
    ) {
5427 5
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5428 5
    }
5429
5430
    if (self::$support['mbstring'] === true) {
5431 5
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5432
    }
5433
5434 5 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5435 5
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
5436 5
      if ($returnTmp !== false) {
5437
        return $returnTmp;
5438 5
      }
5439 2
    }
5440
5441 2
    // fallback via vanilla php
5442 2
5443 2
    return self::strrpos(self::strtonatfold($haystack), self::strtonatfold($needle), $offset, $encoding, $cleanUtf8);
5444
  }
5445 2
5446 1
  /**
5447
   * Find position of last occurrence of a string in a string.
5448 1
   *
5449 1
   * @link http://php.net/manual/en/function.mb-strrpos.php
5450 1
   *
5451
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5452 1
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5453
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5454
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5455
   *                              the end of the string.
5456
   *                              </p>
5457
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5458
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5459
   *
5460
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5461
   *                   is not found, it returns false.</p>
5462
   */
5463
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5464
  {
5465
    if ((int)$needle === $needle && $needle >= 0) {
5466
      $needle = (string)self::chr($needle);
5467 1
    }
5468 2
5469
    // init
5470 5
    $haystack = (string)$haystack;
5471
    $needle = (string)$needle;
5472
    $offset = (int)$offset;
5473
5474
    if (!isset($haystack[0], $needle[0])) {
5475 5
      return false;
5476
    }
5477
5478 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5479
        $cleanUtf8 === true
5480 5
        ||
5481 5
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5482 1
    ) {
5483 1
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5484
      $needle = self::clean($needle);
5485 1
      $haystack = self::clean($haystack);
5486 1
    }
5487 1
5488 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5489 1
        $encoding === 'UTF-8'
5490
        ||
5491 5
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5492 5
    ) {
5493 5
      $encoding = 'UTF-8';
5494 5
    } else {
5495 1
      $encoding = self::normalize_encoding($encoding);
5496
    }
5497 5
5498
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5499 5
      self::checkForSupport();
5500
    }
5501
5502 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5503
        $encoding !== 'UTF-8'
5504
        &&
5505
        self::$support['mbstring'] === false
5506
    ) {
5507
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5508
    }
5509 2
5510 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5511 2
      $returnTmp = \mb_strrpos($haystack, $needle, $offset, $encoding);
5512
      if ($returnTmp !== false) {
5513 1
        return $returnTmp;
5514
      }
5515
    }
5516 1
5517 1 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5518
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
5519 1
      if ($returnTmp !== false) {
5520
        return $returnTmp;
5521
      }
5522 2
    }
5523
5524 2
    // fallback via vanilla php
5525 1
5526
    if ($offset > 0) {
5527
      $haystack = self::substr($haystack, $offset);
5528 2
    } elseif ($offset < 0) {
5529
      $haystack = self::substr($haystack, 0, $offset);
5530
      $offset = 0;
5531
    }
5532
5533
    $pos = strrpos($haystack, $needle);
5534
    if ($pos === false) {
5535
      return false;
5536
    }
5537
5538
    return $offset + self::strlen(substr($haystack, 0, $pos));
5539
  }
5540 1
5541
  /**
5542 1
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5543
   * mask.
5544
   *
5545
   * @param string $str    <p>The input string.</p>
5546
   * @param string $mask   <p>The mask of chars</p>
5547
   * @param int    $offset [optional]
5548
   * @param int    $length [optional]
5549
   *
5550
   * @return int
5551
   */
5552
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5553
  {
5554
    // init
5555
    $length = (int)$length;
5556
    $offset = (int)$offset;
5557
5558
    if ($offset || 2147483647 !== $length) {
5559
      $str = self::substr($str, $offset, $length);
5560
    }
5561
5562
    $str = (string)$str;
5563
    if (!isset($str[0], $mask[0])) {
5564
      return 0;
5565
    }
5566
5567
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5568 20
  }
5569
5570 20
  /**
5571 2
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5572
   *
5573
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5574 2
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5575 2
   * @param bool    $before_needle [optional] <p>
5576
   *                               If <b>TRUE</b>, strstr() returns the part of the
5577 2
   *                               haystack before the first occurrence of the needle (excluding the needle).
5578
   *                               </p>
5579
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5580 20
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5581
   *
5582 20
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5583 4
   */
5584
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5585
  {
5586 19
    $haystack = (string)$haystack;
5587 19
    $needle = (string)$needle;
5588
5589
    if (!isset($haystack[0], $needle[0])) {
5590 19
      return false;
5591 19
    }
5592
5593 19
    if ($cleanUtf8 === true) {
5594 19
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5595 19
      // if invalid characters are found in $haystack before $needle
5596 19
      $needle = self::clean($needle);
5597
      $haystack = self::clean($haystack);
5598 19
    }
5599
5600 16
    if ($encoding !== 'UTF-8') {
5601 16
      $encoding = self::normalize_encoding($encoding);
5602 16
    }
5603 16
5604 5
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5605 5
      self::checkForSupport();
5606 5
    }
5607
5608 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5609 19
        $encoding !== 'UTF-8'
5610
        &&
5611 17
        self::$support['mbstring'] === false
5612 13
    ) {
5613 13
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5614 13
    }
5615 8
5616 8
    if (self::$support['mbstring'] === true) {
5617 8
      $returnTmp = \mb_strstr($haystack, $needle, $before_needle, $encoding);
5618
      if ($returnTmp !== false) {
5619
        return $returnTmp;
5620 19
      }
5621
    }
5622 9
5623 4
    if (self::$support['intl'] === true) {
5624 4
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
5625 4
      if ($returnTmp !== false) {
5626 6
        return $returnTmp;
5627 6
      }
5628 6
    }
5629
5630
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5631 9
5632 6
    if (!isset($match[1])) {
5633 6
      return false;
5634 6
    }
5635
5636
    if ($before_needle) {
5637 19
      return $match[1];
5638
    }
5639 4
5640 4
    return self::substr($haystack, self::strlen($match[1]));
5641 2
  }
5642 2
5643 3
  /**
5644 3
   * Unicode transformation for case-less matching.
5645 3
   *
5646
   * @link http://unicode.org/reports/tr21/tr21-5.html
5647
   *
5648 4
   * @param string  $str       <p>The input string.</p>
5649 16
   * @param bool    $full      [optional] <p>
5650
   *                           <b>true</b>, replace full case folding chars (default)<br />
5651 19
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5652
   *                           </p>
5653
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5654 19
   *
5655 19
   * @return string
5656
   */
5657 3
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5658 19
  {
5659
    // init
5660 19
    $str = (string)$str;
5661
5662
    if (!isset($str[0])) {
5663 19
      return '';
5664 19
    }
5665 19
5666 2
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5667 19
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5668
5669 19
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5670
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5671 19
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5672
    }
5673
5674
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5675
5676
    if ($full) {
5677
5678
      static $fullCaseFold = null;
5679
5680
      if ($fullCaseFold === null) {
5681
        $fullCaseFold = self::getData('caseFolding_full');
5682
      }
5683
5684
      /** @noinspection OffsetOperationsInspection */
5685
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5686
    }
5687 26
5688
    if ($cleanUtf8 === true) {
5689 26
      $str = self::clean($str);
5690
    }
5691 26
5692 5
    return self::strtolower($str);
5693
  }
5694
5695
  /**
5696 22
   * Make a string lowercase.
5697 6
   *
5698
   * @link http://php.net/manual/en/function.mb-strtolower.php
5699
   *
5700 16
   * @param string  $str       <p>The string being lowercased.</p>
5701
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5702
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5703
   *
5704
   * @return string str with all alphabetic characters converted to lowercase.
5705
   */
5706 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5707
  {
5708
    // init
5709
    $str = (string)$str;
5710
5711
    if (!isset($str[0])) {
5712 14
      return '';
5713
    }
5714 14
5715
    if ($cleanUtf8 === true) {
5716
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5717
      // if invalid characters are found in $haystack before $needle
5718
      $str = self::clean($str);
5719
    }
5720
5721
    if ($encoding !== 'UTF-8') {
5722
      $encoding = self::normalize_encoding($encoding);
5723
    }
5724
5725
    return \mb_strtolower($str, $encoding);
5726
  }
5727
5728 1
  /**
5729
   * Generic case sensitive transformation for collation matching.
5730 1
   *
5731
   * @param string $str <p>The input string</p>
5732
   *
5733
   * @return string
5734
   */
5735
  private static function strtonatfold($str)
5736
  {
5737
    /** @noinspection PhpUndefinedClassInspection */
5738
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5739
  }
5740
5741
  /**
5742
   * Make a string uppercase.
5743
   *
5744 8
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5745
   *
5746 8
   * @param string  $str       <p>The string being uppercased.</p>
5747 2
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5748
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5749
   *
5750 7
   * @return string str with all alphabetic characters converted to uppercase.
5751 7
   */
5752 7 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5753
  {
5754 7
    $str = (string)$str;
5755 1
5756 1
    if (!isset($str[0])) {
5757 7
      return '';
5758
    }
5759
5760 7
    if ($cleanUtf8 === true) {
5761
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5762 7
      // if invalid characters are found in $haystack before $needle
5763 7
      $str = self::clean($str);
5764
    }
5765
5766
    if ($encoding !== 'UTF-8') {
5767 7
      $encoding = self::normalize_encoding($encoding);
5768
    }
5769
5770
    return \mb_strtoupper($str, $encoding);
5771 1
  }
5772 1
5773 1
  /**
5774 7
   * Translate characters or replace sub-strings.
5775 7
   *
5776 7
   * @link  http://php.net/manual/en/function.strtr.php
5777
   *
5778 7
   * @param string          $str  <p>The string being translated.</p>
5779 7
   * @param string|string[] $from <p>The string replacing from.</p>
5780
   * @param string|string[] $to   <p>The string being translated to to.</p>
5781 7
   *
5782
   * @return string <p>
5783
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5784
   *                corresponding character in to.
5785
   *                </p>
5786
   */
5787
  public static function strtr($str, $from, $to = INF)
5788
  {
5789
    if (INF !== $to) {
5790
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5790 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5791
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5791 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5792
      $countFrom = count($from);
5793
      $countTo = count($to);
5794
5795
      if ($countFrom > $countTo) {
5796
        $from = array_slice($from, 0, $countTo);
5797
      } elseif ($countFrom < $countTo) {
5798
        $to = array_slice($to, 0, $countFrom);
5799
      }
5800
5801 1
      $from = array_combine($from, $to);
5802
    }
5803 1
5804
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5787 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5805 1
  }
5806 1
5807
  /**
5808
   * Return the width of a string.
5809 1
   *
5810
   * @param string  $str       <p>The input string.</p>
5811 1
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5812
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5813 1
   *
5814 1
   * @return int
5815 1
   */
5816 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5817
  {
5818 1
    if ($encoding !== 'UTF-8') {
5819 1
      $encoding = self::normalize_encoding($encoding);
5820 1
    }
5821
5822 1
    if ($cleanUtf8 === true) {
5823
      // iconv and mbstring are not tolerant to invalid encoding
5824
      // further, their behaviour is inconsistent with that of PHP's substr
5825
      $str = self::clean($str);
5826
    }
5827
5828
    // fallback to "mb_"-function via polyfill
5829
    return \mb_strwidth($str, $encoding);
5830 1
  }
5831
5832
  /**
5833
   * Get part of a string.
5834
   *
5835
   * @link http://php.net/manual/en/function.mb-substr.php
5836
   *
5837
   * @param string  $str       <p>The string being checked.</p>
5838
   * @param int     $start     <p>The first position used in str.</p>
5839
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5840
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5841
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5842
   *
5843
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5844
   */
5845
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5846
  {
5847
    // init
5848
    $str = (string)$str;
5849
5850
    if (!isset($str[0])) {
5851
      return '';
5852
    }
5853
5854
    if ($cleanUtf8 === true) {
5855
      // iconv and mbstring are not tolerant to invalid encoding
5856
      // further, their behaviour is inconsistent with that of PHP's substr
5857
      $str = self::clean($str);
5858
    }
5859
5860
    $str_length = 0;
5861
    if ($start || $length === null) {
5862
      $str_length = (int)self::strlen($str);
5863
    }
5864
5865
    if ($start && $start > $str_length) {
5866
      return false;
5867
    }
5868
5869
    if ($length === null) {
5870
      $length = $str_length;
5871
    } else {
5872
      $length = (int)$length;
5873
    }
5874
5875 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5876
        $encoding === 'UTF-8'
5877
        ||
5878
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5879
    ) {
5880
      $encoding = 'UTF-8';
5881
    } else {
5882
      $encoding = self::normalize_encoding($encoding);
5883
    }
5884
5885
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5886
      self::checkForSupport();
5887
    }
5888
5889 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5890
        $encoding !== 'UTF-8'
5891
        &&
5892
        self::$support['mbstring'] === false
5893
    ) {
5894
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5895
    }
5896
5897
    if (self::$support['mbstring'] === true) {
5898
      return \mb_substr($str, $start, $length, $encoding);
5899
    }
5900
5901
    if (
5902
        $length >= 0 // "iconv_substr()" can't handle negative length
5903
        &&
5904
        self::$support['iconv'] === true
5905
    ) {
5906
      return \iconv_substr($str, $start, $length);
5907
    }
5908
5909
    if (self::$support['intl'] === true) {
5910
      return \grapheme_substr($str, $start, $length);
5911
    }
5912
5913
    // fallback via vanilla php
5914
5915
    // split to array, and remove invalid characters
5916
    $array = self::split($str);
5917
5918
    // extract relevant part, and join to make sting again
5919
    return implode('', array_slice($array, $start, $length));
5920
  }
5921
5922
  /**
5923
   * Binary safe comparison of two strings from an offset, up to length characters.
5924
   *
5925
   * @param string  $main_str           <p>The main string being compared.</p>
5926
   * @param string  $str                <p>The secondary string being compared.</p>
5927
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5928
   *                                    the end of the string.</p>
5929
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5930
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5931
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5932
   *                                    insensitive.</p>
5933
   *
5934
   * @return int
5935
   */
5936
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5937
  {
5938
    $main_str = self::substr($main_str, $offset, $length);
5939
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5938 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5940
5941
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5938 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5939 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5938 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5939 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5942
  }
5943
5944
  /**
5945
   * Count the number of substring occurrences.
5946
   *
5947
   * @link  http://php.net/manual/en/function.substr-count.php
5948
   *
5949
   * @param string  $haystack  <p>The string to search in.</p>
5950
   * @param string  $needle    <p>The substring to search for.</p>
5951
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5952
   * @param int     $length    [optional] <p>
5953
   *                           The maximum length after the specified offset to search for the
5954
   *                           substring. It outputs a warning if the offset plus the length is
5955
   *                           greater than the haystack length.
5956
   *                           </p>
5957
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5958
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5959
   *
5960
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5961
   */
5962
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5963
  {
5964
    // init
5965
    $haystack = (string)$haystack;
5966
    $needle = (string)$needle;
5967
5968
    if (!isset($haystack[0], $needle[0])) {
5969
      return false;
5970
    }
5971
5972
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5973
      $offset = (int)$offset;
5974
      $length = (int)$length;
5975
5976
      if (
5977
          $length + $offset <= 0
5978
          &&
5979
          Bootup::is_php('7.1') === false
5980
      ) {
5981
        return false;
5982
      }
5983
5984
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5985
    }
5986
5987
    if ($encoding !== 'UTF-8') {
5988
      $encoding = self::normalize_encoding($encoding);
5989
    }
5990
5991
    if ($cleanUtf8 === true) {
5992
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5993
      // if invalid characters are found in $haystack before $needle
5994
      $needle = self::clean($needle);
5995
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5996
    }
5997
5998
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5999
      self::checkForSupport();
6000
    }
6001
6002 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6003
        $encoding !== 'UTF-8'
6004
        &&
6005
        self::$support['mbstring'] === false
6006
    ) {
6007
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6008
    }
6009
6010
    if (self::$support['mbstring'] === true) {
6011
      return \mb_substr_count($haystack, $needle, $encoding);
6012
    }
6013
6014
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6015
6016
    return count($matches);
6017
  }
6018
6019
  /**
6020
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6021
   *
6022
   * @param string $haystack <p>The string to search in.</p>
6023
   * @param string $needle   <p>The substring to search for.</p>
6024
   *
6025
   * @return string <p>Return the sub-string.</p>
6026
   */
6027 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6028
  {
6029
    // init
6030
    $haystack = (string)$haystack;
6031
    $needle = (string)$needle;
6032
6033
    if (!isset($haystack[0])) {
6034
      return '';
6035
    }
6036
6037
    if (!isset($needle[0])) {
6038
      return $haystack;
6039
    }
6040
6041
    if (self::str_istarts_with($haystack, $needle) === true) {
6042
      $haystack = self::substr($haystack, self::strlen($needle));
6043
    }
6044
6045
    return $haystack;
6046
  }
6047
6048
  /**
6049
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6050
   *
6051
   * @param string $haystack <p>The string to search in.</p>
6052
   * @param string $needle   <p>The substring to search for.</p>
6053
   *
6054
   * @return string <p>Return the sub-string.</p>
6055
   */
6056 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6057 1
  {
6058
    // init
6059 1
    $haystack = (string)$haystack;
6060
    $needle = (string)$needle;
6061
6062
    if (!isset($haystack[0])) {
6063
      return '';
6064
    }
6065
6066
    if (!isset($needle[0])) {
6067
      return $haystack;
6068
    }
6069 6
6070
    if (self::str_iends_with($haystack, $needle) === true) {
6071 6
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6072 6
    }
6073
6074 6
    return $haystack;
6075
  }
6076 6
6077 3
  /**
6078
   * Removes an prefix ($needle) from start of the string ($haystack).
6079
   *
6080
   * @param string $haystack <p>The string to search in.</p>
6081 6
   * @param string $needle   <p>The substring to search for.</p>
6082
   *
6083 6
   * @return string <p>Return the sub-string.</p>
6084 1
   */
6085 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6086 1
  {
6087
    // init
6088 6
    $haystack = (string)$haystack;
6089
    $needle = (string)$needle;
6090
6091
    if (!isset($haystack[0])) {
6092
      return '';
6093
    }
6094
6095
    if (!isset($needle[0])) {
6096
      return $haystack;
6097
    }
6098 6
6099
    if (self::str_starts_with($haystack, $needle) === true) {
6100 6
      $haystack = self::substr($haystack, self::strlen($needle));
6101
    }
6102 6
6103 6
    return $haystack;
6104
  }
6105
6106 5
  /**
6107 5
   * Replace text within a portion of a string.
6108
   *
6109 5
   * source: https://gist.github.com/stemar/8287074
6110 1
   *
6111 1
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6112 1
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6113
   * @param int|int[]       $start            <p>
6114 5
   *                                          If start is positive, the replacing will begin at the start'th offset
6115
   *                                          into string.
6116
   *                                          <br /><br />
6117
   *                                          If start is negative, the replacing will begin at the start'th character
6118
   *                                          from the end of string.
6119
   *                                          </p>
6120
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6121
   *                                          portion of string which is to be replaced. If it is negative, it
6122
   *                                          represents the number of characters from the end of string at which to
6123
   *                                          stop replacing. If it is not given, then it will default to strlen(
6124
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6125
   *                                          length is zero then this function will have the effect of inserting
6126
   *                                          replacement into string at the given start offset.</p>
6127
   *
6128
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6129
   */
6130
  public static function substr_replace($str, $replacement, $start, $length = null)
6131
  {
6132
    if (is_array($str)) {
6133
      $num = count($str);
6134
6135
      // $replacement
6136
      if (is_array($replacement)) {
6137
        $replacement = array_slice($replacement, 0, $num);
6138
      } else {
6139
        $replacement = array_pad(array($replacement), $num, $replacement);
6140
      }
6141
6142
      // $start
6143 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6144 1
        $start = array_slice($start, 0, $num);
6145
        foreach ($start as &$valueTmp) {
6146 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6147
        }
6148
        unset($valueTmp);
6149
      } else {
6150
        $start = array_pad(array($start), $num, $start);
6151
      }
6152
6153
      // $length
6154
      if (!isset($length)) {
6155
        $length = array_fill(0, $num, 0);
6156 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6157
        $length = array_slice($length, 0, $num);
6158 1
        foreach ($length as &$valueTmpV2) {
6159
          if (isset($valueTmpV2)) {
6160 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6161
          } else {
6162 1
            $valueTmpV2 = 0;
6163 1
          }
6164
        }
6165
        unset($valueTmpV2);
6166 1
      } else {
6167
        $length = array_pad(array($length), $num, $length);
6168 1
      }
6169 1
6170
      // Recursive call
6171
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6172 1
6173
    } else {
6174
6175 1
      if (is_array($replacement)) {
6176 1
        if (count($replacement) > 0) {
6177 1
          $replacement = $replacement[0];
6178 1
        } else {
6179 1
          $replacement = '';
6180
        }
6181
      }
6182 1
    }
6183
6184
    // init
6185
    $str = (string)$str;
6186
    $replacement = (string)$replacement;
6187
6188
    if (!isset($str[0])) {
6189
      return $replacement;
6190
    }
6191
6192
    preg_match_all('/./us', $str, $smatches);
6193
    preg_match_all('/./us', $replacement, $rmatches);
6194
6195
    if ($length === null) {
6196
      $length = (int)self::strlen($str);
6197
    }
6198
6199
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6200
6201 10
    return implode('', $smatches[0]);
6202
  }
6203 10
6204 10
  /**
6205
   * Removes an suffix ($needle) from end of the string ($haystack).
6206 10
   *
6207 3
   * @param string $haystack <p>The string to search in.</p>
6208
   * @param string $needle   <p>The substring to search for.</p>
6209
   *
6210 8
   * @return string <p>Return the sub-string.</p>
6211 8
   */
6212 8 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6213
  {
6214 8
    $haystack = (string)$haystack;
6215
    $needle = (string)$needle;
6216 8
6217
    if (!isset($haystack[0])) {
6218 8
      return '';
6219 1
    }
6220 1
6221 1
    if (!isset($needle[0])) {
6222
      return $haystack;
6223 8
    }
6224 8
6225
    if (self::str_ends_with($haystack, $needle) === true) {
6226 8
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6227 8
    }
6228 8
6229 8
    return $haystack;
6230 8
  }
6231
6232 8
  /**
6233 8
   * Returns a case swapped version of the string.
6234 8
   *
6235 8
   * @param string  $str       <p>The input string.</p>
6236
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6237 8
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6238 6
   *
6239 6
   * @return string <p>Each character's case swapped.</p>
6240 6
   */
6241 6
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6242
  {
6243 6
    $str = (string)$str;
6244 3
6245 3
    if (!isset($str[0])) {
6246
      return '';
6247 6
    }
6248 6
6249
    if ($encoding !== 'UTF-8') {
6250 8
      $encoding = self::normalize_encoding($encoding);
6251
    }
6252
6253
    if ($cleanUtf8 === true) {
6254
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6255
      // if invalid characters are found in $haystack before $needle
6256
      $str = self::clean($str);
6257
    }
6258 1
6259
    $strSwappedCase = preg_replace_callback(
6260 1
        '/[\S]/u',
6261
        function ($match) use ($encoding) {
6262
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6263
6264
          if ($match[0] === $marchToUpper) {
6265
            return UTF8::strtolower($match[0], $encoding);
6266
          } else {
6267
            return $marchToUpper;
6268
          }
6269
        },
6270
        $str
6271
    );
6272
6273
    return $strSwappedCase;
6274
  }
6275
6276
  /**
6277
   * alias for "UTF8::to_ascii()"
6278
   *
6279
   * @see UTF8::to_ascii()
6280
   *
6281
   * @param string $s
6282
   * @param string $subst_chr
6283
   * @param bool   $strict
6284
   *
6285
   * @return string
6286
   *
6287
   * @deprecated
6288
   */
6289
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6290
  {
6291
    return self::to_ascii($s, $subst_chr, $strict);
6292
  }
6293
6294
  /**
6295
   * alias for "UTF8::to_iso8859()"
6296
   *
6297
   * @see UTF8::to_iso8859()
6298
   *
6299
   * @param string $str
6300
   *
6301
   * @return string|string[]
6302
   *
6303
   * @deprecated
6304
   */
6305
  public static function toIso8859($str)
6306
  {
6307
    return self::to_iso8859($str);
6308
  }
6309
6310
  /**
6311
   * alias for "UTF8::to_latin1()"
6312
   *
6313
   * @see UTF8::to_latin1()
6314
   *
6315
   * @param $str
6316
   *
6317
   * @return string
6318
   *
6319
   * @deprecated
6320
   */
6321
  public static function toLatin1($str)
6322
  {
6323
    return self::to_latin1($str);
6324
  }
6325
6326
  /**
6327
   * alias for "UTF8::to_utf8()"
6328
   *
6329
   * @see UTF8::to_utf8()
6330
   *
6331
   * @param string $str
6332
   *
6333
   * @return string
6334
   *
6335
   * @deprecated
6336
   */
6337
  public static function toUTF8($str)
6338
  {
6339
    return self::to_utf8($str);
6340
  }
6341
6342
  /**
6343
   * Convert a string into ASCII.
6344
   *
6345
   * @param string $str     <p>The input string.</p>
6346
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6347
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6348
   *                        performance</p>
6349
   *
6350
   * @return string
6351
   *
6352
   * @throws \Exception
6353
   */
6354
  public static function to_ascii($str, $unknown = '?', $strict = false)
6355
  {
6356
    static $UTF8_TO_ASCII;
6357
6358
    // init
6359
    $str = (string)$str;
6360
6361
    if (!isset($str[0])) {
6362
      return '';
6363
    }
6364
6365
    $str = self::clean($str, true, true, true);
6366
6367
    // check if we only have ASCII
6368
    if (self::is_ascii($str) === true) {
6369
      return $str;
6370
    }
6371
6372
    if ($strict === true) {
6373
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
6374
        self::checkForSupport();
6375
      }
6376
6377
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6378
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6379
6380
        // check again, if we only have ASCII, now ...
6381
        if (self::is_ascii($str) === true) {
6382
          return $str;
6383
        }
6384
6385
      } else {
6386
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
6387
      }
6388
    }
6389
6390
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6391
    $chars = $ar[0];
6392
    foreach ($chars as &$c) {
6393
6394
      $ordC0 = ord($c[0]);
6395
6396
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6397
        continue;
6398
      }
6399
6400
      $ordC1 = ord($c[1]);
6401
6402
      // ASCII - next please
6403
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6404
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6405
      }
6406
6407
      if ($ordC0 >= 224) {
6408
        $ordC2 = ord($c[2]);
6409
6410
        if ($ordC0 <= 239) {
6411
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6412
        }
6413
6414
        if ($ordC0 >= 240) {
6415
          $ordC3 = ord($c[3]);
6416
6417
          if ($ordC0 <= 247) {
6418
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6419
          }
6420
6421
          if ($ordC0 >= 248) {
6422
            $ordC4 = ord($c[4]);
6423
6424 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6425
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6426
            }
6427
6428
            if ($ordC0 >= 252) {
6429
              $ordC5 = ord($c[5]);
6430
6431 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6432
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6433
              }
6434
            }
6435
          }
6436
        }
6437
      }
6438
6439
      if ($ordC0 >= 254 && $ordC0 <= 255) {
6440
        $c = $unknown;
6441
        continue;
6442
      }
6443
6444
      if (!isset($ord)) {
6445
        $c = $unknown;
6446
        continue;
6447
      }
6448
6449
      $bank = $ord >> 8;
6450
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
6451
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
6452
        if (file_exists($bankfile)) {
6453
          /** @noinspection PhpIncludeInspection */
6454
          require $bankfile;
6455
        } else {
6456
          $UTF8_TO_ASCII[$bank] = array();
6457
        }
6458
      }
6459
6460
      $newchar = $ord & 255;
6461
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
6462
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6463
      } else {
6464
        $c = $unknown;
6465
      }
6466
    }
6467
6468
    return implode('', $chars);
6469
  }
6470
6471
  /**
6472
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6473
   *
6474
   * @param string|string[] $str
6475
   *
6476
   * @return string|string[]
6477
   */
6478
  public static function to_iso8859($str)
6479
  {
6480
    if (is_array($str)) {
6481
6482
      /** @noinspection ForeachSourceInspection */
6483
      foreach ($str as $k => $v) {
6484
        /** @noinspection AlterInForeachInspection */
6485
        /** @noinspection OffsetOperationsInspection */
6486
        $str[$k] = self::to_iso8859($v);
6487
      }
6488
6489
      return $str;
6490
    }
6491
6492
    $str = (string)$str;
6493
6494
    if (!isset($str[0])) {
6495
      return '';
6496
    }
6497
6498
    return self::utf8_decode($str);
6499
  }
6500
6501
  /**
6502
   * alias for "UTF8::to_iso8859()"
6503
   *
6504
   * @see UTF8::to_iso8859()
6505
   *
6506
   * @param string|string[] $str
6507
   *
6508
   * @return string|string[]
6509
   */
6510
  public static function to_latin1($str)
6511
  {
6512
    return self::to_iso8859($str);
6513
  }
6514
6515
  /**
6516
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6517
   *
6518
   * <ul>
6519
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6520
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
6521
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this case.</li>
6522
   * </ul>
6523
   *
6524
   * @param string|string[] $str                    <p>Any string or array.</p>
6525
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6526
   *
6527
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6528
   */
6529
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6530
  {
6531
    if (is_array($str)) {
6532
      /** @noinspection ForeachSourceInspection */
6533
      foreach ($str as $k => $v) {
6534
        /** @noinspection AlterInForeachInspection */
6535
        /** @noinspection OffsetOperationsInspection */
6536
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6537
      }
6538
6539
      return $str;
6540
    }
6541
6542
    $str = (string)$str;
6543
6544
    if (!isset($str[0])) {
6545
      return $str;
6546
    }
6547
6548
    $max = strlen($str);
6549
    $buf = '';
6550
6551
    /** @noinspection ForeachInvariantsInspection */
6552
    for ($i = 0; $i < $max; $i++) {
6553
      $c1 = $str[$i];
6554
6555
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6556
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6557
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6558
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6559
6560
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6561
6562
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6563
            $buf .= $c1 . $c2;
6564
            $i++;
6565
          } else { // not valid UTF8 - convert it
6566
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6567
            $cc2 = ($c1 & "\x3f") | "\x80";
6568
            $buf .= $cc1 . $cc2;
6569
          }
6570
6571 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6572
6573
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6574
            $buf .= $c1 . $c2 . $c3;
6575
            $i += 2;
6576
          } else { // not valid UTF8 - convert it
6577
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6578
            $cc2 = ($c1 & "\x3f") | "\x80";
6579
            $buf .= $cc1 . $cc2;
6580
          }
6581
6582
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6583
6584 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6585
            $buf .= $c1 . $c2 . $c3 . $c4;
6586
            $i += 3;
6587
          } else { // not valid UTF8 - convert it
6588
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6589
            $cc2 = ($c1 & "\x3f") | "\x80";
6590
            $buf .= $cc1 . $cc2;
6591
          }
6592
6593
        } else { // doesn't look like UTF8, but should be converted
6594
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6595
          $cc2 = (($c1 & "\x3f") | "\x80");
6596
          $buf .= $cc1 . $cc2;
6597
        }
6598
6599
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6600
6601
        $ordC1 = ord($c1);
6602
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6603
          $buf .= self::$win1252ToUtf8[$ordC1];
6604
        } else {
6605
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6606
          $cc2 = (($c1 & "\x3f") | "\x80");
6607
          $buf .= $cc1 . $cc2;
6608
        }
6609
6610
      } else { // it doesn't need conversion
6611
        $buf .= $c1;
6612
      }
6613
    }
6614
6615
    // decode unicode escape sequences
6616
    $buf = preg_replace_callback(
6617
        '/\\\\u([0-9a-f]{4})/i',
6618
        function ($match) {
6619
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6620
        },
6621
        $buf
6622
    );
6623
6624
    // decode UTF-8 codepoints
6625
    if ($decodeHtmlEntityToUtf8 === true) {
6626
      $buf = self::html_entity_decode($buf);
6627
    }
6628
6629
    return $buf;
6630
  }
6631
6632
  /**
6633
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6634
   *
6635
   * INFO: This is slower then "trim()"
6636
   *
6637
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6638
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6639
   *
6640
   * @param string $str   <p>The string to be trimmed</p>
6641
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6642
   *
6643
   * @return string <p>The trimmed string.</p>
6644
   */
6645
  public static function trim($str = '', $chars = INF)
6646
  {
6647
    $str = (string)$str;
6648
6649
    if (!isset($str[0])) {
6650
      return '';
6651
    }
6652
6653
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6654
    if ($chars === INF || !$chars) {
6655
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6656
    }
6657
6658
    return self::rtrim(self::ltrim($str, $chars), $chars);
6659
  }
6660
6661
  /**
6662
   * Makes string's first char uppercase.
6663
   *
6664
   * @param string  $str       <p>The input string.</p>
6665
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6666
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6667
   *
6668
   * @return string <p>The resulting string</p>
6669
   */
6670
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6671
  {
6672
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6673
  }
6674
6675
  /**
6676
   * alias for "UTF8::ucfirst()"
6677
   *
6678
   * @see UTF8::ucfirst()
6679
   *
6680
   * @param string  $word
6681
   * @param string  $encoding
6682
   * @param boolean $cleanUtf8
6683
   *
6684
   * @return string
6685
   */
6686
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6687
  {
6688
    return self::ucfirst($word, $encoding, $cleanUtf8);
6689
  }
6690
6691
  /**
6692
   * Uppercase for all words in the string.
6693
   *
6694
   * @param string   $str        <p>The input string.</p>
6695
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6696
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6697
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6698
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6699
   *
6700
   * @return string
6701
   */
6702
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6703
  {
6704
    if (!$str) {
6705
      return '';
6706
    }
6707
6708
    $words = self::str_to_words($str, $charlist);
6709
    $newwords = array();
6710
6711
    if (count($exceptions) > 0) {
6712
      $useExceptions = true;
6713
    } else {
6714
      $useExceptions = false;
6715
    }
6716
6717
    foreach ($words as $word) {
6718
6719
      if (!$word) {
6720
        continue;
6721
      }
6722
6723
      if (
6724
          ($useExceptions === false)
6725
          ||
6726
          (
6727
              $useExceptions === true
6728
              &&
6729
              !in_array($word, $exceptions, true)
6730
          )
6731
      ) {
6732
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6733
      }
6734
6735
      $newwords[] = $word;
6736
    }
6737
6738
    return implode('', $newwords);
6739
  }
6740
6741
  /**
6742
   * Multi decode html entity & fix urlencoded-win1252-chars.
6743
   *
6744
   * e.g:
6745
   * 'test+test'                     => 'test test'
6746
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6747
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6748
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6749
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6750
   * 'Düsseldorf'                   => 'Düsseldorf'
6751
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6752
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6753
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6754
   *
6755
   * @param string $str          <p>The input string.</p>
6756
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6757
   *
6758
   * @return string
6759
   */
6760 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6761
  {
6762
    $str = (string)$str;
6763
6764
    if (!isset($str[0])) {
6765
      return '';
6766
    }
6767
6768
    $pattern = '/%u([0-9a-f]{3,4})/i';
6769
    if (preg_match($pattern, $str)) {
6770
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6771
    }
6772
6773
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6774
6775
    do {
6776
      $str_compare = $str;
6777
6778
      $str = self::fix_simple_utf8(
6779
          urldecode(
6780
              self::html_entity_decode(
6781
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6782
                  $flags
6783
              )
6784
          )
6785
      );
6786
6787
    } while ($multi_decode === true && $str_compare !== $str);
6788
6789
    return (string)$str;
6790
  }
6791
6792
  /**
6793
   * Return a array with "urlencoded"-win1252 -> UTF-8
6794
   *
6795
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6796
   *
6797
   * @return array
6798
   */
6799
  public static function urldecode_fix_win1252_chars()
6800
  {
6801
    static $array = array(
6802
        '%20' => ' ',
6803
        '%21' => '!',
6804
        '%22' => '"',
6805
        '%23' => '#',
6806
        '%24' => '$',
6807
        '%25' => '%',
6808
        '%26' => '&',
6809
        '%27' => "'",
6810
        '%28' => '(',
6811
        '%29' => ')',
6812
        '%2A' => '*',
6813
        '%2B' => '+',
6814
        '%2C' => ',',
6815
        '%2D' => '-',
6816
        '%2E' => '.',
6817
        '%2F' => '/',
6818
        '%30' => '0',
6819
        '%31' => '1',
6820
        '%32' => '2',
6821
        '%33' => '3',
6822
        '%34' => '4',
6823
        '%35' => '5',
6824
        '%36' => '6',
6825
        '%37' => '7',
6826
        '%38' => '8',
6827
        '%39' => '9',
6828
        '%3A' => ':',
6829
        '%3B' => ';',
6830
        '%3C' => '<',
6831
        '%3D' => '=',
6832
        '%3E' => '>',
6833
        '%3F' => '?',
6834
        '%40' => '@',
6835
        '%41' => 'A',
6836
        '%42' => 'B',
6837
        '%43' => 'C',
6838
        '%44' => 'D',
6839
        '%45' => 'E',
6840
        '%46' => 'F',
6841
        '%47' => 'G',
6842
        '%48' => 'H',
6843
        '%49' => 'I',
6844
        '%4A' => 'J',
6845
        '%4B' => 'K',
6846
        '%4C' => 'L',
6847
        '%4D' => 'M',
6848
        '%4E' => 'N',
6849
        '%4F' => 'O',
6850
        '%50' => 'P',
6851
        '%51' => 'Q',
6852
        '%52' => 'R',
6853
        '%53' => 'S',
6854
        '%54' => 'T',
6855
        '%55' => 'U',
6856
        '%56' => 'V',
6857
        '%57' => 'W',
6858
        '%58' => 'X',
6859
        '%59' => 'Y',
6860
        '%5A' => 'Z',
6861
        '%5B' => '[',
6862
        '%5C' => '\\',
6863
        '%5D' => ']',
6864
        '%5E' => '^',
6865
        '%5F' => '_',
6866
        '%60' => '`',
6867
        '%61' => 'a',
6868
        '%62' => 'b',
6869
        '%63' => 'c',
6870
        '%64' => 'd',
6871
        '%65' => 'e',
6872
        '%66' => 'f',
6873
        '%67' => 'g',
6874
        '%68' => 'h',
6875
        '%69' => 'i',
6876
        '%6A' => 'j',
6877
        '%6B' => 'k',
6878
        '%6C' => 'l',
6879
        '%6D' => 'm',
6880
        '%6E' => 'n',
6881
        '%6F' => 'o',
6882
        '%70' => 'p',
6883
        '%71' => 'q',
6884
        '%72' => 'r',
6885
        '%73' => 's',
6886
        '%74' => 't',
6887
        '%75' => 'u',
6888
        '%76' => 'v',
6889
        '%77' => 'w',
6890
        '%78' => 'x',
6891
        '%79' => 'y',
6892
        '%7A' => 'z',
6893
        '%7B' => '{',
6894
        '%7C' => '|',
6895
        '%7D' => '}',
6896
        '%7E' => '~',
6897
        '%7F' => '',
6898
        '%80' => '`',
6899
        '%81' => '',
6900
        '%82' => '‚',
6901
        '%83' => 'ƒ',
6902
        '%84' => '„',
6903
        '%85' => '…',
6904
        '%86' => '†',
6905
        '%87' => '‡',
6906
        '%88' => 'ˆ',
6907
        '%89' => '‰',
6908
        '%8A' => 'Š',
6909
        '%8B' => '‹',
6910
        '%8C' => 'Œ',
6911
        '%8D' => '',
6912
        '%8E' => 'Ž',
6913
        '%8F' => '',
6914
        '%90' => '',
6915
        '%91' => '‘',
6916
        '%92' => '’',
6917
        '%93' => '“',
6918
        '%94' => '”',
6919
        '%95' => '•',
6920
        '%96' => '–',
6921
        '%97' => '—',
6922
        '%98' => '˜',
6923
        '%99' => '™',
6924
        '%9A' => 'š',
6925
        '%9B' => '›',
6926
        '%9C' => 'œ',
6927
        '%9D' => '',
6928
        '%9E' => 'ž',
6929
        '%9F' => 'Ÿ',
6930
        '%A0' => '',
6931
        '%A1' => '¡',
6932
        '%A2' => '¢',
6933
        '%A3' => '£',
6934
        '%A4' => '¤',
6935
        '%A5' => '¥',
6936
        '%A6' => '¦',
6937
        '%A7' => '§',
6938
        '%A8' => '¨',
6939
        '%A9' => '©',
6940
        '%AA' => 'ª',
6941
        '%AB' => '«',
6942
        '%AC' => '¬',
6943
        '%AD' => '',
6944
        '%AE' => '®',
6945
        '%AF' => '¯',
6946
        '%B0' => '°',
6947
        '%B1' => '±',
6948
        '%B2' => '²',
6949
        '%B3' => '³',
6950
        '%B4' => '´',
6951
        '%B5' => 'µ',
6952
        '%B6' => '¶',
6953
        '%B7' => '·',
6954
        '%B8' => '¸',
6955
        '%B9' => '¹',
6956
        '%BA' => 'º',
6957
        '%BB' => '»',
6958
        '%BC' => '¼',
6959
        '%BD' => '½',
6960
        '%BE' => '¾',
6961
        '%BF' => '¿',
6962
        '%C0' => 'À',
6963
        '%C1' => 'Á',
6964
        '%C2' => 'Â',
6965
        '%C3' => 'Ã',
6966
        '%C4' => 'Ä',
6967
        '%C5' => 'Å',
6968
        '%C6' => 'Æ',
6969
        '%C7' => 'Ç',
6970
        '%C8' => 'È',
6971
        '%C9' => 'É',
6972
        '%CA' => 'Ê',
6973
        '%CB' => 'Ë',
6974
        '%CC' => 'Ì',
6975
        '%CD' => 'Í',
6976
        '%CE' => 'Î',
6977
        '%CF' => 'Ï',
6978
        '%D0' => 'Ð',
6979
        '%D1' => 'Ñ',
6980
        '%D2' => 'Ò',
6981
        '%D3' => 'Ó',
6982
        '%D4' => 'Ô',
6983
        '%D5' => 'Õ',
6984
        '%D6' => 'Ö',
6985
        '%D7' => '×',
6986
        '%D8' => 'Ø',
6987
        '%D9' => 'Ù',
6988
        '%DA' => 'Ú',
6989
        '%DB' => 'Û',
6990
        '%DC' => 'Ü',
6991
        '%DD' => 'Ý',
6992
        '%DE' => 'Þ',
6993
        '%DF' => 'ß',
6994
        '%E0' => 'à',
6995
        '%E1' => 'á',
6996
        '%E2' => 'â',
6997
        '%E3' => 'ã',
6998
        '%E4' => 'ä',
6999
        '%E5' => 'å',
7000
        '%E6' => 'æ',
7001
        '%E7' => 'ç',
7002
        '%E8' => 'è',
7003
        '%E9' => 'é',
7004
        '%EA' => 'ê',
7005
        '%EB' => 'ë',
7006
        '%EC' => 'ì',
7007
        '%ED' => 'í',
7008
        '%EE' => 'î',
7009
        '%EF' => 'ï',
7010
        '%F0' => 'ð',
7011
        '%F1' => 'ñ',
7012
        '%F2' => 'ò',
7013
        '%F3' => 'ó',
7014
        '%F4' => 'ô',
7015
        '%F5' => 'õ',
7016
        '%F6' => 'ö',
7017
        '%F7' => '÷',
7018
        '%F8' => 'ø',
7019
        '%F9' => 'ù',
7020
        '%FA' => 'ú',
7021
        '%FB' => 'û',
7022
        '%FC' => 'ü',
7023
        '%FD' => 'ý',
7024
        '%FE' => 'þ',
7025
        '%FF' => 'ÿ',
7026
    );
7027
7028
    return $array;
7029
  }
7030
7031
  /**
7032
   * Decodes an UTF-8 string to ISO-8859-1.
7033
   *
7034
   * @param string $str <p>The input string.</p>
7035
   *
7036
   * @return string
7037
   */
7038
  public static function utf8_decode($str)
7039
  {
7040
    // init
7041
    $str = (string)$str;
7042
7043
    if (!isset($str[0])) {
7044
      return '';
7045
    }
7046
7047
    $str = (string)self::to_utf8($str);
7048
7049
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7050
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7051
7052
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7053
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
7054
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
7055
    }
7056
7057
    /** @noinspection PhpInternalEntityUsedInspection */
7058
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
7059
  }
7060
7061
  /**
7062
   * Encodes an ISO-8859-1 string to UTF-8.
7063
   *
7064
   * @param string $str <p>The input string.</p>
7065
   *
7066
   * @return string
7067
   */
7068
  public static function utf8_encode($str)
7069
  {
7070
    // init
7071
    $str = (string)$str;
7072
7073
    if (!isset($str[0])) {
7074
      return '';
7075
    }
7076
7077
    $str = \utf8_encode($str);
7078
7079
    if (false === strpos($str, "\xC2")) {
7080
      return $str;
7081
    } else {
7082
7083
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
7084
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
7085
7086
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7087
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
7088
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
7089
      }
7090
7091
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7092
    }
7093
  }
7094
7095
  /**
7096
   * fix -> utf8-win1252 chars
7097
   *
7098
   * @param string $str <p>The input string.</p>
7099
   *
7100
   * @return string
7101
   *
7102
   * @deprecated use "UTF8::fix_simple_utf8()"
7103
   */
7104
  public static function utf8_fix_win1252_chars($str)
7105
  {
7106
    return self::fix_simple_utf8($str);
7107
  }
7108
7109
  /**
7110
   * Returns an array with all utf8 whitespace characters.
7111
   *
7112
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7113
   *
7114
   * @author: Derek E. [email protected]
7115
   *
7116
   * @return array <p>
7117
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7118
   *               as defined in above URL.
7119
   *               </p>
7120
   */
7121
  public static function whitespace_table()
7122
  {
7123
    return self::$whitespaceTable;
7124
  }
7125
7126
  /**
7127
   * Limit the number of words in a string.
7128
   *
7129
   * @param string $str      <p>The input string.</p>
7130
   * @param int    $words    <p>The limit of words as integer.</p>
7131
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7132
   *
7133
   * @return string
7134
   */
7135
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7136
  {
7137
    $str = (string)$str;
7138
7139
    if (!isset($str[0])) {
7140
      return '';
7141
    }
7142
7143
    $words = (int)$words;
7144
7145
    if ($words < 1) {
7146
      return '';
7147
    }
7148
7149
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7150
7151
    if (
7152
        !isset($matches[0])
7153
        ||
7154
        self::strlen($str) === self::strlen($matches[0])
7155
    ) {
7156
      return $str;
7157
    }
7158
7159
    return self::rtrim($matches[0]) . $strAddOn;
7160
  }
7161
7162
  /**
7163
   * Wraps a string to a given number of characters
7164
   *
7165
   * @link  http://php.net/manual/en/function.wordwrap.php
7166
   *
7167
   * @param string $str   <p>The input string.</p>
7168
   * @param int    $width [optional] <p>The column width.</p>
7169
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7170
   * @param bool   $cut   [optional] <p>
7171
   *                      If the cut is set to true, the string is
7172
   *                      always wrapped at or before the specified width. So if you have
7173
   *                      a word that is larger than the given width, it is broken apart.
7174
   *                      </p>
7175
   *
7176
   * @return string <p>The given string wrapped at the specified column.</p>
7177
   */
7178
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7179
  {
7180
    $str = (string)$str;
7181
    $break = (string)$break;
7182
7183
    if (!isset($str[0], $break[0])) {
7184
      return '';
7185
    }
7186
7187
    $w = '';
7188
    $strSplit = explode($break, $str);
7189
    $count = count($strSplit);
7190
7191
    $chars = array();
7192
    /** @noinspection ForeachInvariantsInspection */
7193
    for ($i = 0; $i < $count; ++$i) {
7194
7195
      if ($i) {
7196
        $chars[] = $break;
7197
        $w .= '#';
7198
      }
7199
7200
      $c = $strSplit[$i];
7201
      unset($strSplit[$i]);
7202
7203
      foreach (self::split($c) as $c) {
7204
        $chars[] = $c;
7205
        $w .= ' ' === $c ? ' ' : '?';
7206
      }
7207
    }
7208
7209
    $strReturn = '';
7210
    $j = 0;
7211
    $b = $i = -1;
7212
    $w = wordwrap($w, $width, '#', $cut);
7213
7214
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7215
      for (++$i; $i < $b; ++$i) {
7216
        $strReturn .= $chars[$j];
7217
        unset($chars[$j++]);
7218
      }
7219
7220
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7221
        unset($chars[$j++]);
7222
      }
7223
7224
      $strReturn .= $break;
7225
    }
7226
7227
    return $strReturn . implode('', $chars);
7228
  }
7229
7230
  /**
7231
   * Returns an array of Unicode White Space characters.
7232
   *
7233
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7234
   */
7235
  public static function ws()
7236
  {
7237
    return self::$whitespace;
7238
  }
7239
7240
}
7241