Completed
Push — master ( e0f452...3084a7 )
by Lars
04:03
created

UTF8::is_json()   B

Complexity

Conditions 5
Paths 3

Size

Total Lines 24
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 5

Importance

Changes 0
Metric Value
dl 0
loc 24
ccs 7
cts 7
cp 1
rs 8.5125
c 0
b 0
f 0
cc 5
eloc 14
nc 3
nop 1
crap 5
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
93
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
94
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
96
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
98
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
99
  );
100
101
  /**
102
   * Numeric code point => UTF-8 Character
103
   *
104
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
105
   *
106
   * @var array
107
   */
108
  private static $whitespace = array(
109
    // NUL Byte
110
    0     => "\x0",
111
    // Tab
112
    9     => "\x9",
113
    // New Line
114
    10    => "\xa",
115
    // Vertical Tab
116
    11    => "\xb",
117
    // Carriage Return
118
    13    => "\xd",
119
    // Ordinary Space
120
    32    => "\x20",
121
    // NO-BREAK SPACE
122
    160   => "\xc2\xa0",
123
    // OGHAM SPACE MARK
124
    5760  => "\xe1\x9a\x80",
125
    // MONGOLIAN VOWEL SEPARATOR
126
    6158  => "\xe1\xa0\x8e",
127
    // EN QUAD
128
    8192  => "\xe2\x80\x80",
129
    // EM QUAD
130
    8193  => "\xe2\x80\x81",
131
    // EN SPACE
132
    8194  => "\xe2\x80\x82",
133
    // EM SPACE
134
    8195  => "\xe2\x80\x83",
135
    // THREE-PER-EM SPACE
136
    8196  => "\xe2\x80\x84",
137
    // FOUR-PER-EM SPACE
138
    8197  => "\xe2\x80\x85",
139
    // SIX-PER-EM SPACE
140
    8198  => "\xe2\x80\x86",
141
    // FIGURE SPACE
142
    8199  => "\xe2\x80\x87",
143
    // PUNCTUATION SPACE
144
    8200  => "\xe2\x80\x88",
145
    // THIN SPACE
146
    8201  => "\xe2\x80\x89",
147
    //HAIR SPACE
148
    8202  => "\xe2\x80\x8a",
149
    // LINE SEPARATOR
150
    8232  => "\xe2\x80\xa8",
151
    // PARAGRAPH SEPARATOR
152
    8233  => "\xe2\x80\xa9",
153
    // NARROW NO-BREAK SPACE
154
    8239  => "\xe2\x80\xaf",
155
    // MEDIUM MATHEMATICAL SPACE
156
    8287  => "\xe2\x81\x9f",
157
    // IDEOGRAPHIC SPACE
158
    12288 => "\xe3\x80\x80",
159
  );
160
161
  /**
162
   * @var array
163
   */
164
  private static $whitespaceTable = array(
165
      'SPACE'                     => "\x20",
166
      'NO-BREAK SPACE'            => "\xc2\xa0",
167
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
168
      'EN QUAD'                   => "\xe2\x80\x80",
169
      'EM QUAD'                   => "\xe2\x80\x81",
170
      'EN SPACE'                  => "\xe2\x80\x82",
171
      'EM SPACE'                  => "\xe2\x80\x83",
172
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
173
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
174
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
175
      'FIGURE SPACE'              => "\xe2\x80\x87",
176
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
177
      'THIN SPACE'                => "\xe2\x80\x89",
178
      'HAIR SPACE'                => "\xe2\x80\x8a",
179
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
180
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
181
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
182
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
183
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
184
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
185
  );
186
187
  /**
188
   * bidirectional text chars
189
   *
190
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
191
   *
192
   * @var array
193
   */
194
  private static $bidiUniCodeControlsTable = array(
195
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
196
    8234 => "\xE2\x80\xAA",
197
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
198
    8235 => "\xE2\x80\xAB",
199
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
200
    8236 => "\xE2\x80\xAC",
201
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
202
    8237 => "\xE2\x80\xAD",
203
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
204
    8238 => "\xE2\x80\xAE",
205
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
206
    8294 => "\xE2\x81\xA6",
207
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
208
    8295 => "\xE2\x81\xA7",
209
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
210
    8296 => "\xE2\x81\xA8",
211
    // POP DIRECTIONAL ISOLATE
212
    8297 => "\xE2\x81\xA9",
213
  );
214
215
  /**
216
   * @var array
217
   */
218
  private static $commonCaseFold = array(
219
      'ſ'            => 's',
220
      "\xCD\x85"     => 'ι',
221
      'ς'            => 'σ',
222
      "\xCF\x90"     => 'β',
223
      "\xCF\x91"     => 'θ',
224
      "\xCF\x95"     => 'φ',
225
      "\xCF\x96"     => 'π',
226
      "\xCF\xB0"     => 'κ',
227
      "\xCF\xB1"     => 'ρ',
228
      "\xCF\xB5"     => 'ε',
229
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
230
      "\xE1\xBE\xBE" => 'ι',
231
  );
232
233
  /**
234
   * @var array
235
   */
236
  private static $brokenUtf8ToUtf8 = array(
237
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
238
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
239
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
240
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
241
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
242
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
243
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
244
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
245
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
246
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
247
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
248
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
249
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
250
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
251
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
252
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
253
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
254
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
255
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
256
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
257
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
258
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
259
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
260
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
261
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
262
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
263
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
264
      'ü'       => 'ü',
265
      'ä'       => 'ä',
266
      'ö'       => 'ö',
267
      'Ö'       => 'Ö',
268
      'ß'       => 'ß',
269
      'Ã '       => 'à',
270
      'á'       => 'á',
271
      'â'       => 'â',
272
      'ã'       => 'ã',
273
      'ù'       => 'ù',
274
      'ú'       => 'ú',
275
      'û'       => 'û',
276
      'Ù'       => 'Ù',
277
      'Ú'       => 'Ú',
278
      'Û'       => 'Û',
279
      'Ü'       => 'Ü',
280
      'ò'       => 'ò',
281
      'ó'       => 'ó',
282
      'ô'       => 'ô',
283
      'è'       => 'è',
284
      'é'       => 'é',
285
      'ê'       => 'ê',
286
      'ë'       => 'ë',
287
      'À'       => 'À',
288
      'Á'       => 'Á',
289
      'Â'       => 'Â',
290
      'Ã'       => 'Ã',
291
      'Ä'       => 'Ä',
292
      'Ã…'       => 'Å',
293
      'Ç'       => 'Ç',
294
      'È'       => 'È',
295
      'É'       => 'É',
296
      'Ê'       => 'Ê',
297
      'Ë'       => 'Ë',
298
      'ÃŒ'       => 'Ì',
299
      'Í'       => 'Í',
300
      'ÃŽ'       => 'Î',
301
      'Ï'       => 'Ï',
302
      'Ñ'       => 'Ñ',
303
      'Ã’'       => 'Ò',
304
      'Ó'       => 'Ó',
305
      'Ô'       => 'Ô',
306
      'Õ'       => 'Õ',
307
      'Ø'       => 'Ø',
308
      'Ã¥'       => 'å',
309
      'æ'       => 'æ',
310
      'ç'       => 'ç',
311
      'ì'       => 'ì',
312
      'í'       => 'í',
313
      'î'       => 'î',
314
      'ï'       => 'ï',
315
      'ð'       => 'ð',
316
      'ñ'       => 'ñ',
317
      'õ'       => 'õ',
318
      'ø'       => 'ø',
319
      'ý'       => 'ý',
320
      'ÿ'       => 'ÿ',
321
      '€'      => '€',
322
      '’'      => '’',
323
  );
324
325
  /**
326
   * @var array
327
   */
328
  private static $utf8ToWin1252 = array(
329
      "\xe2\x82\xac" => "\x80", // EURO SIGN
330
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
331
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
332
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
333
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
334
      "\xe2\x80\xa0" => "\x86", // DAGGER
335
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
336
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
337
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
338
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
339
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
340
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
341
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
342
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
343
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
344
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
345
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
346
      "\xe2\x80\xa2" => "\x95", // BULLET
347
      "\xe2\x80\x93" => "\x96", // EN DASH
348
      "\xe2\x80\x94" => "\x97", // EM DASH
349
      "\xcb\x9c"     => "\x98", // SMALL TILDE
350
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
351
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
352
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
353
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
354
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
355
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
356
  );
357
358
  /**
359
   * @var array
360
   */
361
  private static $utf8MSWord = array(
362
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
363
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
364
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
365
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
366
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
367
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
368
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
369
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
370
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
371
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
372
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
373
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
374
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
375
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
376
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
377
  );
378
379
  /**
380
   * @var array
381
   */
382
  private static $iconvEncoding = array(
383
      'ANSI_X3.4-1968',
384
      'ANSI_X3.4-1986',
385
      'ASCII',
386
      'CP367',
387
      'IBM367',
388
      'ISO-IR-6',
389
      'ISO646-US',
390
      'ISO_646.IRV:1991',
391
      'US',
392
      'US-ASCII',
393
      'CSASCII',
394
      'UTF-8',
395
      'ISO-10646-UCS-2',
396
      'UCS-2',
397
      'CSUNICODE',
398
      'UCS-2BE',
399
      'UNICODE-1-1',
400
      'UNICODEBIG',
401
      'CSUNICODE11',
402
      'UCS-2LE',
403
      'UNICODELITTLE',
404
      'ISO-10646-UCS-4',
405
      'UCS-4',
406
      'CSUCS4',
407
      'UCS-4BE',
408
      'UCS-4LE',
409
      'UTF-16',
410
      'UTF-16BE',
411
      'UTF-16LE',
412
      'UTF-32',
413
      'UTF-32BE',
414
      'UTF-32LE',
415
      'UNICODE-1-1-UTF-7',
416
      'UTF-7',
417
      'CSUNICODE11UTF7',
418
      'UCS-2-INTERNAL',
419
      'UCS-2-SWAPPED',
420
      'UCS-4-INTERNAL',
421
      'UCS-4-SWAPPED',
422
      'C99',
423
      'JAVA',
424
      'CP819',
425
      'IBM819',
426
      'ISO-8859-1',
427
      'ISO-IR-100',
428
      'ISO8859-1',
429
      'ISO_8859-1',
430
      'ISO_8859-1:1987',
431
      'L1',
432
      'LATIN1',
433
      'CSISOLATIN1',
434
      'ISO-8859-2',
435
      'ISO-IR-101',
436
      'ISO8859-2',
437
      'ISO_8859-2',
438
      'ISO_8859-2:1987',
439
      'L2',
440
      'LATIN2',
441
      'CSISOLATIN2',
442
      'ISO-8859-3',
443
      'ISO-IR-109',
444
      'ISO8859-3',
445
      'ISO_8859-3',
446
      'ISO_8859-3:1988',
447
      'L3',
448
      'LATIN3',
449
      'CSISOLATIN3',
450
      'ISO-8859-4',
451
      'ISO-IR-110',
452
      'ISO8859-4',
453
      'ISO_8859-4',
454
      'ISO_8859-4:1988',
455
      'L4',
456
      'LATIN4',
457
      'CSISOLATIN4',
458
      'CYRILLIC',
459
      'ISO-8859-5',
460
      'ISO-IR-144',
461
      'ISO8859-5',
462
      'ISO_8859-5',
463
      'ISO_8859-5:1988',
464
      'CSISOLATINCYRILLIC',
465
      'ARABIC',
466
      'ASMO-708',
467
      'ECMA-114',
468
      'ISO-8859-6',
469
      'ISO-IR-127',
470
      'ISO8859-6',
471
      'ISO_8859-6',
472
      'ISO_8859-6:1987',
473
      'CSISOLATINARABIC',
474
      'ECMA-118',
475
      'ELOT_928',
476
      'GREEK',
477
      'GREEK8',
478
      'ISO-8859-7',
479
      'ISO-IR-126',
480
      'ISO8859-7',
481
      'ISO_8859-7',
482
      'ISO_8859-7:1987',
483
      'ISO_8859-7:2003',
484
      'CSISOLATINGREEK',
485
      'HEBREW',
486
      'ISO-8859-8',
487
      'ISO-IR-138',
488
      'ISO8859-8',
489
      'ISO_8859-8',
490
      'ISO_8859-8:1988',
491
      'CSISOLATINHEBREW',
492
      'ISO-8859-9',
493
      'ISO-IR-148',
494
      'ISO8859-9',
495
      'ISO_8859-9',
496
      'ISO_8859-9:1989',
497
      'L5',
498
      'LATIN5',
499
      'CSISOLATIN5',
500
      'ISO-8859-10',
501
      'ISO-IR-157',
502
      'ISO8859-10',
503
      'ISO_8859-10',
504
      'ISO_8859-10:1992',
505
      'L6',
506
      'LATIN6',
507
      'CSISOLATIN6',
508
      'ISO-8859-11',
509
      'ISO8859-11',
510
      'ISO_8859-11',
511
      'ISO-8859-13',
512
      'ISO-IR-179',
513
      'ISO8859-13',
514
      'ISO_8859-13',
515
      'L7',
516
      'LATIN7',
517
      'ISO-8859-14',
518
      'ISO-CELTIC',
519
      'ISO-IR-199',
520
      'ISO8859-14',
521
      'ISO_8859-14',
522
      'ISO_8859-14:1998',
523
      'L8',
524
      'LATIN8',
525
      'ISO-8859-15',
526
      'ISO-IR-203',
527
      'ISO8859-15',
528
      'ISO_8859-15',
529
      'ISO_8859-15:1998',
530
      'LATIN-9',
531
      'ISO-8859-16',
532
      'ISO-IR-226',
533
      'ISO8859-16',
534
      'ISO_8859-16',
535
      'ISO_8859-16:2001',
536
      'L10',
537
      'LATIN10',
538
      'KOI8-R',
539
      'CSKOI8R',
540
      'KOI8-U',
541
      'KOI8-RU',
542
      'CP1250',
543
      'MS-EE',
544
      'WINDOWS-1250',
545
      'CP1251',
546
      'MS-CYRL',
547
      'WINDOWS-1251',
548
      'CP1252',
549
      'MS-ANSI',
550
      'WINDOWS-1252',
551
      'CP1253',
552
      'MS-GREEK',
553
      'WINDOWS-1253',
554
      'CP1254',
555
      'MS-TURK',
556
      'WINDOWS-1254',
557
      'CP1255',
558
      'MS-HEBR',
559
      'WINDOWS-1255',
560
      'CP1256',
561
      'MS-ARAB',
562
      'WINDOWS-1256',
563
      'CP1257',
564
      'WINBALTRIM',
565
      'WINDOWS-1257',
566
      'CP1258',
567
      'WINDOWS-1258',
568
      '850',
569
      'CP850',
570
      'IBM850',
571
      'CSPC850MULTILINGUAL',
572
      '862',
573
      'CP862',
574
      'IBM862',
575
      'CSPC862LATINHEBREW',
576
      '866',
577
      'CP866',
578
      'IBM866',
579
      'CSIBM866',
580
      'MAC',
581
      'MACINTOSH',
582
      'MACROMAN',
583
      'CSMACINTOSH',
584
      'MACCENTRALEUROPE',
585
      'MACICELAND',
586
      'MACCROATIAN',
587
      'MACROMANIA',
588
      'MACCYRILLIC',
589
      'MACUKRAINE',
590
      'MACGREEK',
591
      'MACTURKISH',
592
      'MACHEBREW',
593
      'MACARABIC',
594
      'MACTHAI',
595
      'HP-ROMAN8',
596
      'R8',
597
      'ROMAN8',
598
      'CSHPROMAN8',
599
      'NEXTSTEP',
600
      'ARMSCII-8',
601
      'GEORGIAN-ACADEMY',
602
      'GEORGIAN-PS',
603
      'KOI8-T',
604
      'CP154',
605
      'CYRILLIC-ASIAN',
606
      'PT154',
607
      'PTCP154',
608
      'CSPTCP154',
609
      'KZ-1048',
610
      'RK1048',
611
      'STRK1048-2002',
612
      'CSKZ1048',
613
      'MULELAO-1',
614
      'CP1133',
615
      'IBM-CP1133',
616
      'ISO-IR-166',
617
      'TIS-620',
618
      'TIS620',
619
      'TIS620-0',
620
      'TIS620.2529-1',
621
      'TIS620.2533-0',
622
      'TIS620.2533-1',
623
      'CP874',
624
      'WINDOWS-874',
625
      'VISCII',
626
      'VISCII1.1-1',
627
      'CSVISCII',
628
      'TCVN',
629
      'TCVN-5712',
630
      'TCVN5712-1',
631
      'TCVN5712-1:1993',
632
      'ISO-IR-14',
633
      'ISO646-JP',
634
      'JIS_C6220-1969-RO',
635
      'JP',
636
      'CSISO14JISC6220RO',
637
      'JISX0201-1976',
638
      'JIS_X0201',
639
      'X0201',
640
      'CSHALFWIDTHKATAKANA',
641
      'ISO-IR-87',
642
      'JIS0208',
643
      'JIS_C6226-1983',
644
      'JIS_X0208',
645
      'JIS_X0208-1983',
646
      'JIS_X0208-1990',
647
      'X0208',
648
      'CSISO87JISX0208',
649
      'ISO-IR-159',
650
      'JIS_X0212',
651
      'JIS_X0212-1990',
652
      'JIS_X0212.1990-0',
653
      'X0212',
654
      'CSISO159JISX02121990',
655
      'CN',
656
      'GB_1988-80',
657
      'ISO-IR-57',
658
      'ISO646-CN',
659
      'CSISO57GB1988',
660
      'CHINESE',
661
      'GB_2312-80',
662
      'ISO-IR-58',
663
      'CSISO58GB231280',
664
      'CN-GB-ISOIR165',
665
      'ISO-IR-165',
666
      'ISO-IR-149',
667
      'KOREAN',
668
      'KSC_5601',
669
      'KS_C_5601-1987',
670
      'KS_C_5601-1989',
671
      'CSKSC56011987',
672
      'EUC-JP',
673
      'EUCJP',
674
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
675
      'CSEUCPKDFMTJAPANESE',
676
      'MS_KANJI',
677
      'SHIFT-JIS',
678
      'SHIFT_JIS',
679
      'SJIS',
680
      'CSSHIFTJIS',
681
      'CP932',
682
      'ISO-2022-JP',
683
      'CSISO2022JP',
684
      'ISO-2022-JP-1',
685
      'ISO-2022-JP-2',
686
      'CSISO2022JP2',
687
      'CN-GB',
688
      'EUC-CN',
689
      'EUCCN',
690
      'GB2312',
691
      'CSGB2312',
692
      'GBK',
693
      'CP936',
694
      'MS936',
695
      'WINDOWS-936',
696
      'GB18030',
697
      'ISO-2022-CN',
698
      'CSISO2022CN',
699
      'ISO-2022-CN-EXT',
700
      'HZ',
701
      'HZ-GB-2312',
702
      'EUC-TW',
703
      'EUCTW',
704
      'CSEUCTW',
705
      'BIG-5',
706
      'BIG-FIVE',
707
      'BIG5',
708
      'BIGFIVE',
709
      'CN-BIG5',
710
      'CSBIG5',
711
      'CP950',
712
      'BIG5-HKSCS:1999',
713
      'BIG5-HKSCS:2001',
714
      'BIG5-HKSCS',
715
      'BIG5-HKSCS:2004',
716
      'BIG5HKSCS',
717
      'EUC-KR',
718
      'EUCKR',
719
      'CSEUCKR',
720
      'CP949',
721
      'UHC',
722
      'CP1361',
723
      'JOHAB',
724
      'ISO-2022-KR',
725
      'CSISO2022KR',
726
      'CP856',
727
      'CP922',
728
      'CP943',
729
      'CP1046',
730
      'CP1124',
731
      'CP1129',
732
      'CP1161',
733
      'IBM-1161',
734
      'IBM1161',
735
      'CSIBM1161',
736
      'CP1162',
737
      'IBM-1162',
738
      'IBM1162',
739
      'CSIBM1162',
740
      'CP1163',
741
      'IBM-1163',
742
      'IBM1163',
743
      'CSIBM1163',
744
      'DEC-KANJI',
745
      'DEC-HANYU',
746
      '437',
747
      'CP437',
748
      'IBM437',
749
      'CSPC8CODEPAGE437',
750
      'CP737',
751
      'CP775',
752
      'IBM775',
753
      'CSPC775BALTIC',
754
      '852',
755
      'CP852',
756
      'IBM852',
757
      'CSPCP852',
758
      'CP853',
759
      '855',
760
      'CP855',
761
      'IBM855',
762
      'CSIBM855',
763
      '857',
764
      'CP857',
765
      'IBM857',
766
      'CSIBM857',
767
      'CP858',
768
      '860',
769
      'CP860',
770
      'IBM860',
771
      'CSIBM860',
772
      '861',
773
      'CP-IS',
774
      'CP861',
775
      'IBM861',
776
      'CSIBM861',
777
      '863',
778
      'CP863',
779
      'IBM863',
780
      'CSIBM863',
781
      'CP864',
782
      'IBM864',
783
      'CSIBM864',
784
      '865',
785
      'CP865',
786
      'IBM865',
787
      'CSIBM865',
788
      '869',
789
      'CP-GR',
790
      'CP869',
791
      'IBM869',
792
      'CSIBM869',
793
      'CP1125',
794
      'EUC-JISX0213',
795
      'SHIFT_JISX0213',
796
      'ISO-2022-JP-3',
797
      'BIG5-2003',
798
      'ISO-IR-230',
799
      'TDS565',
800
      'ATARI',
801
      'ATARIST',
802
      'RISCOS-LATIN1',
803
  );
804
805
  /**
806
   * @var array
807 1
   */
808
  private static $support = array();
809 1
810 1
  /**
811
   * __construct()
812
   */
813
  public function __construct()
814
  {
815
    self::checkForSupport();
816
  }
817
818
  /**
819
   * Return the character at the specified position: $str[1] like functionality.
820 2
   *
821
   * @param string $str <p>A UTF-8 string.</p>
822 2
   * @param int    $pos <p>The position of character to return.</p>
823
   *
824
   * @return string <p>Single Multi-Byte character.</p>
825
   */
826
  public static function access($str, $pos)
827
  {
828
    $str = (string)$str;
829
    $pos = (int)$pos;
830
831
    if (!isset($str[0])) {
832
      return '';
833
    }
834 1
835
    if ($pos < 0) {
836 1
      return '';
837 1
    }
838 1
839
    return self::substr($str, $pos, 1);
840 1
  }
841
842
  /**
843
   * Prepends UTF-8 BOM character to the string and returns the whole string.
844
   *
845
   * INFO: If BOM already existed there, the Input string is returned.
846
   *
847
   * @param string $str <p>The input string.</p>
848
   *
849
   * @return string <p>The output string that contains BOM.</p>
850 1
   */
851
  public static function add_bom_to_string($str)
852 1
  {
853
    if (self::string_has_bom($str) === false) {
854
      $str = self::bom() . $str;
855
    }
856
857
    return $str;
858
  }
859
860 2
  /**
861
   * Convert binary into an string.
862 2
   *
863
   * @param mixed $bin 1|0
864
   *
865
   * @return string
866
   */
867
  public static function binary_to_str($bin)
868
  {
869
    if (!isset($bin[0])) {
870
      return '';
871
    }
872
873
    return pack('H*', base_convert($bin, 2, 16));
874 1
  }
875
876 1
  /**
877
   * Returns the UTF-8 Byte Order Mark Character.
878
   *
879
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
880
   *
881
   * @return string UTF-8 Byte Order Mark
882
   */
883
  public static function bom()
884 2
  {
885
    return "\xef\xbb\xbf";
886 2
  }
887
888 1
  /**
889
   * @alias of UTF8::chr_map()
890 1
   *
891 1
   * @see   UTF8::chr_map()
892 1
   *
893 1
   * @param string|array $callback
894 1
   * @param string       $str
895 1
   *
896 2
   * @return array
897
   */
898
  public static function callback($callback, $str)
899
  {
900
    return self::chr_map($callback, $str);
901
  }
902
903
  /**
904
   * This method will auto-detect your server environment for UTF-8 support.
905
   *
906
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
907 9
   */
908
  public static function checkForSupport()
909 9
  {
910 9
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
911 1
912
      self::$support['already_checked_via_portable_utf8'] = true;
913
914 9
      // http://php.net/manual/en/book.mbstring.php
915
      self::$support['mbstring'] = self::mbstring_loaded();
916
917
      // http://php.net/manual/en/book.iconv.php
918 9
      self::$support['iconv'] = self::iconv_loaded();
919
920
      // http://php.net/manual/en/book.intl.php
921
      self::$support['intl'] = self::intl_loaded();
922
923 9
      // http://php.net/manual/en/class.intlchar.php
924 9
      self::$support['intlChar'] = self::intlChar_loaded();
925 8
926
      // http://php.net/manual/en/book.pcre.php
927
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
928
    }
929 8
  }
930 6
931
  /**
932
   * Generates a UTF-8 encoded character from the given code point.
933 7
   *
934 6
   * INFO: opposite to UTF8::ord()
935 6
   *
936
   * @param int    $code_point <p>The code point for which to generate a character.</p>
937
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
938 7
   *
939 7
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
940 7
   */
941 7
  public static function chr($code_point, $encoding = 'UTF-8')
942
  {
943
    $i = (int)$code_point;
944 1
    if ($i !== $code_point) {
945 1
      return null;
946 1
    }
947 1
948 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
949
      self::checkForSupport();
950
    }
951
952
    if ($encoding !== 'UTF-8') {
953
      $encoding = self::normalize_encoding($encoding);
954
    } elseif (self::$support['intlChar'] === true) {
955
      return \IntlChar::chr($code_point);
956
    }
957
958
    // use static cache, if there is no support for "IntlChar"
959
    static $cache = array();
960
    $cacheKey = $code_point . $encoding;
961
    if (isset($cache[$cacheKey]) === true) {
962
      return $cache[$cacheKey];
963 1
    }
964
965 1
    if (0x80 > $code_point %= 0x200000) {
966
      $str = chr($code_point);
967 1
    } elseif (0x800 > $code_point) {
968
      $str = chr(0xC0 | $code_point >> 6) .
969
             chr(0x80 | $code_point & 0x3F);
970
    } elseif (0x10000 > $code_point) {
971
      $str = chr(0xE0 | $code_point >> 12) .
972
             chr(0x80 | $code_point >> 6 & 0x3F) .
973
             chr(0x80 | $code_point & 0x3F);
974
    } else {
975
      $str = chr(0xF0 | $code_point >> 18) .
976
             chr(0x80 | $code_point >> 12 & 0x3F) .
977
             chr(0x80 | $code_point >> 6 & 0x3F) .
978
             chr(0x80 | $code_point & 0x3F);
979
    }
980
981
    if ($encoding !== 'UTF-8') {
982 4
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
983
    }
984 4
985 3
    // add into static cache
986
    $cache[$cacheKey] = $str;
987
988 4
    return $str;
989
  }
990
991
  /**
992
   * Applies callback to all characters of a string.
993
   *
994
   * @param string|array $callback <p>The callback function.</p>
995
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
996
   *
997
   * @return array <p>The outcome of callback.</p>
998 2
   */
999
  public static function chr_map($callback, $str)
1000 2
  {
1001 2
    $chars = self::split($str);
1002 2
1003
    return array_map($callback, $chars);
1004 2
  }
1005
1006 2
  /**
1007
   * Generates an array of byte length of each character of a Unicode string.
1008
   *
1009 2
   * 1 byte => U+0000  - U+007F
1010
   * 2 byte => U+0080  - U+07FF
1011 2
   * 3 byte => U+0800  - U+FFFF
1012 2
   * 4 byte => U+10000 - U+10FFFF
1013 2
   *
1014
   * @param string $str <p>The original Unicode string.</p>
1015 1
   *
1016 1
   * @return array <p>An array of byte lengths of each character.</p>
1017 1
   */
1018
  public static function chr_size_list($str)
1019
  {
1020
    $str = (string)$str;
1021
1022
    if (!isset($str[0])) {
1023 2
      return array();
1024
    }
1025 2
1026 2
    return array_map('strlen', self::split($str));
1027
  }
1028 2
1029
  /**
1030
   * Get a decimal code representation of a specific character.
1031
   *
1032
   * @param string $char <p>The input character.</p>
1033
   *
1034
   * @return int
1035
   */
1036
  public static function chr_to_decimal($char)
1037
  {
1038
    $char = (string)$char;
1039 1
    $code = self::ord($char[0]);
1040
    $bytes = 1;
1041 1
1042
    if (!($code & 0x80)) {
1043
      // 0xxxxxxx
1044
      return $code;
1045
    }
1046
1047
    if (($code & 0xe0) === 0xc0) {
1048
      // 110xxxxx
1049
      $bytes = 2;
1050
      $code &= ~0xc0;
1051
    } elseif (($code & 0xf0) === 0xe0) {
1052
      // 1110xxxx
1053 1
      $bytes = 3;
1054
      $code &= ~0xe0;
1055 1
    } elseif (($code & 0xf8) === 0xf0) {
1056
      // 11110xxx
1057
      $bytes = 4;
1058
      $code &= ~0xf0;
1059
    }
1060
1061
    for ($i = 2; $i <= $bytes; $i++) {
1062
      // 10xxxxxx
1063
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1064
    }
1065
1066
    return $code;
1067
  }
1068
1069
  /**
1070
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1071 44
   *
1072
   * @param string $char <p>The input character</p>
1073
   * @param string $pfix [optional]
1074
   *
1075
   * @return string <p>The code point encoded as U+xxxx<p>
1076
   */
1077
  public static function chr_to_hex($char, $pfix = 'U+')
1078
  {
1079
    if ($char === '&#0;') {
1080
      $char = '';
1081
    }
1082
1083
    return self::int_to_hex(self::ord($char), $pfix);
1084
  }
1085
1086 44
  /**
1087 44
   * alias for "UTF8::chr_to_decimal()"
1088
   *
1089 44
   * @see UTF8::chr_to_decimal()
1090 44
   *
1091
   * @param string $chr
1092 44
   *
1093 17
   * @return int
1094 17
   */
1095
  public static function chr_to_int($chr)
1096 44
  {
1097 12
    return self::chr_to_decimal($chr);
1098 12
  }
1099
1100 44
  /**
1101 5
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1102 5
   *
1103
   * @param string $body     <p>The original string to be split.</p>
1104 44
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1105
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1106
   *
1107
   * @return string <p>The chunked string</p>
1108
   */
1109
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1110
  {
1111
    return implode($end, self::split($body, $chunklen));
1112
  }
1113
1114 4
  /**
1115
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1116 4
   *
1117
   * @param string $str                     <p>The string to be sanitized.</p>
1118 4
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1119 1
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1120
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1121
   *                                        => "..."</p>
1122
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1123 4
   *                                        $normalize_whitespace</p>
1124
   *
1125
   * @return string <p>Clean UTF-8 encoded string.</p>
1126
   */
1127
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1128
  {
1129
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1130 4
    // caused connection reset problem on larger strings
1131
1132 4
    $regx = '/
1133
      (
1134
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1135
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1136
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1137
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1138
        ){1,100}                      # ...one or more times
1139
      )
1140
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1141
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1142
    /x';
1143
    $str = preg_replace($regx, '$1', $str);
1144
1145
    $str = self::replace_diamond_question_mark($str, '');
1146 5
    $str = self::remove_invisible_characters($str);
1147
1148 5
    if ($normalize_whitespace === true) {
1149 5
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1150 5
    }
1151
1152 5
    if ($normalize_msword === true) {
1153
      $str = self::normalize_msword($str);
1154 5
    }
1155 5
1156 5
    if ($remove_bom === true) {
1157
      $str = self::remove_bom($str);
1158 5
    }
1159
1160 5
    return $str;
1161 1
  }
1162
1163 1
  /**
1164 1
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1165 1
   *
1166
   * @param string $str <p>The input string.</p>
1167 1
   *
1168 1
   * @return string
1169
   */
1170 5
  public static function cleanup($str)
1171
  {
1172
    $str = (string)$str;
1173
1174
    if (!isset($str[0])) {
1175
      return '';
1176
    }
1177
1178
    // fixed ISO <-> UTF-8 Errors
1179
    $str = self::fix_simple_utf8($str);
1180
1181
    // remove all none UTF-8 symbols
1182 6
    // && remove diamond question mark (�)
1183
    // && remove remove invisible characters (e.g. "\0")
1184 6
    // && remove BOM
1185
    // && normalize whitespace chars (but keep non-breaking-spaces)
1186
    $str = self::clean($str, true, true, false, true);
1187
1188
    return (string)$str;
1189
  }
1190
1191
  /**
1192
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1193
   *
1194 1
   * INFO: opposite to UTF8::string()
1195
   *
1196 1
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1197 1
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1198 1
   *                                    default, code points will be returned as integers.</p>
1199
   *
1200 1
   * @return array <p>The array of code points.</p>
1201
   */
1202
  public static function codepoints($arg, $u_style = false)
1203
  {
1204
    if (is_string($arg)) {
1205
      $arg = self::split($arg);
1206
    }
1207
1208
    $arg = array_map(
1209
        array(
1210
            '\\voku\\helper\\UTF8',
1211
            'ord',
1212
        ),
1213
        $arg
1214
    );
1215
1216 11
    if ($u_style) {
1217
      $arg = array_map(
1218 11
          array(
1219 11
              '\\voku\\helper\\UTF8',
1220
              'int_to_hex',
1221 11
          ),
1222 5
          $arg
1223
      );
1224
    }
1225 11
1226 1
    return $arg;
1227 1
  }
1228
1229 11
  /**
1230
   * Returns count of characters used in a string.
1231
   *
1232
   * @param string $str       <p>The input string.</p>
1233 11
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1234
   *
1235
   * @return array <p>An associative array of Character as keys and
1236 11
   *               their count as values.</p>
1237
   */
1238 1
  public static function count_chars($str, $cleanUtf8 = false)
1239 11
  {
1240
    return array_count_values(self::split($str, 1, $cleanUtf8));
1241
  }
1242
1243 11
  /**
1244
   * Converts a int-value into an UTF-8 character.
1245
   *
1246 11
   * @param int $int
1247 1
   *
1248 1
   * @return string
1249 1
   */
1250 11
  public static function decimal_to_chr($int)
1251 11
  {
1252
    if (Bootup::is_php('5.4') === true) {
1253
      $flags = ENT_QUOTES | ENT_HTML5;
1254
    } else {
1255
      $flags = ENT_QUOTES;
1256 2
    }
1257
1258
    return self::html_entity_decode('&#' . $int . ';', $flags);
1259 1
  }
1260
1261
  /**
1262 2
   * Encode a string with a new charset-encoding.
1263 1
   *
1264
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1265
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1266 2
   *
1267 2
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1268 2
   * @param string $str      <p>The input string</p>
1269
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1270 2
   *                         /> otherwise we auto-detect the current string-encoding</p>
1271
   *
1272 2
   * @return string
1273 2
   */
1274
  public static function encode($encoding, $str, $force = true)
1275
  {
1276
    $str = (string)$str;
1277 1
    $encoding = (string)$encoding;
1278
1279
    if (!isset($str[0], $encoding[0])) {
1280
      return $str;
1281
    }
1282
1283
    if ($encoding !== 'UTF-8') {
1284
      $encoding = self::normalize_encoding($encoding);
1285
    }
1286
1287
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1288
      self::checkForSupport();
1289
    }
1290
1291
    $encodingDetected = self::str_detect_encoding($str);
1292
1293
    if (
1294
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1295
        &&
1296
        (
1297
            $force === true
1298
            ||
1299
            $encodingDetected !== $encoding
1300
        )
1301
    ) {
1302
1303
      if (
1304
          $encoding === 'UTF-8'
1305
          &&
1306
          (
1307
              $force === true
1308
              || $encodingDetected === 'UTF-8'
1309
              || $encodingDetected === 'WINDOWS-1252'
1310
              || $encodingDetected === 'ISO-8859-1'
1311
          )
1312
      ) {
1313
        return self::to_utf8($str);
1314
      }
1315
1316
      if (
1317
          $encoding === 'ISO-8859-1'
1318
          &&
1319
          (
1320
              $force === true
1321
              || $encodingDetected === 'ISO-8859-1'
1322
              || $encodingDetected === 'UTF-8'
1323
          )
1324
      ) {
1325
        return self::to_iso8859($str);
1326
      }
1327
1328 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1329
          $encoding !== 'UTF-8'
1330
          &&
1331
          $encoding !== 'WINDOWS-1252'
1332
          &&
1333
          self::$support['mbstring'] === false
1334
      ) {
1335
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1336
      }
1337
1338
      $strEncoded = \mb_convert_encoding(
1339
          $str,
1340
          $encoding,
1341
          $encodingDetected
1342
      );
1343
1344
      if ($strEncoded) {
1345
        return $strEncoded;
1346
      }
1347
    }
1348
1349
    return $str;
1350
  }
1351
1352
  /**
1353
   * Reads entire file into a string.
1354
   *
1355
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1356
   *
1357
   * @link http://php.net/manual/en/function.file-get-contents.php
1358
   *
1359
   * @param string        $filename      <p>
1360
   *                                     Name of the file to read.
1361
   *                                     </p>
1362 2
   * @param int|null      $flags         [optional] <p>
1363
   *                                     Prior to PHP 6, this parameter is called
1364
   *                                     use_include_path and is a bool.
1365 2
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1366 2
   *                                     to trigger include path
1367
   *                                     search.
1368 2
   *                                     </p>
1369 2
   *                                     <p>
1370
   *                                     The value of flags can be any combination of
1371
   *                                     the following flags (with some restrictions), joined with the
1372
   *                                     binary OR (|)
1373 2
   *                                     operator.
1374 2
   *                                     </p>
1375
   *                                     <p>
1376 2
   *                                     <table>
1377 2
   *                                     Available flags
1378
   *                                     <tr valign="top">
1379 2
   *                                     <td>Flag</td>
1380 1
   *                                     <td>Description</td>
1381 1
   *                                     </tr>
1382 2
   *                                     <tr valign="top">
1383
   *                                     <td>
1384
   *                                     FILE_USE_INCLUDE_PATH
1385
   *                                     </td>
1386 2
   *                                     <td>
1387 1
   *                                     Search for filename in the include directory.
1388
   *                                     See include_path for more
1389
   *                                     information.
1390 1
   *                                     </td>
1391 1
   *                                     </tr>
1392 1
   *                                     <tr valign="top">
1393 1
   *                                     <td>
1394
   *                                     FILE_TEXT
1395 1
   *                                     </td>
1396
   *                                     <td>
1397
   *                                     As of PHP 6, the default encoding of the read
1398
   *                                     data is UTF-8. You can specify a different encoding by creating a
1399
   *                                     custom context or by changing the default using
1400
   *                                     stream_default_encoding. This flag cannot be
1401
   *                                     used with FILE_BINARY.
1402
   *                                     </td>
1403
   *                                     </tr>
1404
   *                                     <tr valign="top">
1405 1
   *                                     <td>
1406
   *                                     FILE_BINARY
1407 1
   *                                     </td>
1408
   *                                     <td>
1409
   *                                     With this flag, the file is read in binary mode. This is the default
1410
   *                                     setting and cannot be used with FILE_TEXT.
1411
   *                                     </td>
1412
   *                                     </tr>
1413
   *                                     </table>
1414
   *                                     </p>
1415
   * @param resource|null $context       [optional] <p>
1416
   *                                     A valid context resource created with
1417
   *                                     stream_context_create. If you don't need to use a
1418
   *                                     custom context, you can skip this parameter by &null;.
1419 9
   *                                     </p>
1420
   * @param int|null      $offset        [optional] <p>
1421 9
   *                                     The offset where the reading starts.
1422 9
   *                                     </p>
1423 3
   * @param int|null      $maxlen        [optional] <p>
1424
   *                                     Maximum length of data read. The default is to read until end
1425 3
   *                                     of file is reached.
1426 3
   *                                     </p>
1427 3
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1428 9
   *
1429 2
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1430 2
   *                                     or pdf, because they used non default utf-8 chars</p>
1431 2
   *
1432 2
   * @return string <p>The function returns the read data or false on failure.</p>
1433 9
   */
1434
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1435 8
  {
1436
    // init
1437 2
    $timeout = (int)$timeout;
1438 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1439
1440 8
    if ($timeout && $context === null) {
1441
      $context = stream_context_create(
1442 8
          array(
1443 6
              'http' =>
1444 6
                  array(
1445 6
                      'timeout' => $timeout,
1446
                  ),
1447 6
          )
1448 3
      );
1449 3
    }
1450 5
1451
    if (is_int($maxlen) === true) {
1452
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1453
    } else {
1454
      $data = file_get_contents($filename, $flags, $context, $offset);
1455 8
    }
1456 8
1457 5
    // return false on error
1458 8
    if ($data === false) {
1459
      return false;
1460
    }
1461 2
1462 2
    if ($convertToUtf8 === true) {
1463 8
      $data = self::encode('UTF-8', $data, false);
1464 8
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1465 9
    }
1466
1467 9
    return $data;
1468
  }
1469
1470
  /**
1471
   * Checks if a file starts with BOM (Byte Order Mark) character.
1472
   *
1473
   * @param string $file_path <p>Path to a valid file.</p>
1474
   *
1475
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1476
   */
1477
  public static function file_has_bom($file_path)
1478
  {
1479
    return self::string_has_bom(file_get_contents($file_path));
1480
  }
1481
1482
  /**
1483
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1484
   *
1485
   * @param mixed  $var
1486
   * @param int    $normalization_form
1487
   * @param string $leading_combining
1488
   *
1489
   * @return mixed
1490
   */
1491
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1492
  {
1493
    switch (gettype($var)) {
1494 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1495
        foreach ($var as $k => $v) {
1496
          /** @noinspection AlterInForeachInspection */
1497
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1498
        }
1499
        break;
1500 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1501
        foreach ($var as $k => $v) {
1502
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1503
        }
1504
        break;
1505
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1506
1507
        if (false !== strpos($var, "\r")) {
1508
          // Workaround https://bugs.php.net/65732
1509
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1510
        }
1511
1512
        if (self::is_ascii($var) === false) {
1513
          /** @noinspection PhpUndefinedClassInspection */
1514
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1515
            $n = '-';
1516
          } else {
1517
            /** @noinspection PhpUndefinedClassInspection */
1518
            $n = \Normalizer::normalize($var, $normalization_form);
1519
1520 1
            if (isset($n[0])) {
1521
              $var = $n;
1522 1
            } else {
1523 1
              $var = self::encode('UTF-8', $var);
1524 1
            }
1525 1
          }
1526
1527
          if (
1528 1
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1529
              &&
1530
              preg_match('/^\p{Mn}/u', $var)
1531
          ) {
1532
            // Prevent leading combining chars
1533
            // for NFC-safe concatenations.
1534
            $var = $leading_combining . $var;
1535
          }
1536
        }
1537
1538
        break;
1539
    }
1540 1
1541
    return $var;
1542 1
  }
1543 1
1544 1
  /**
1545 1
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1546
   *
1547
   * Gets a specific external variable by name and optionally filters it
1548 1
   *
1549
   * @link  http://php.net/manual/en/function.filter-input.php
1550
   *
1551
   * @param int    $type          <p>
1552
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1553
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1554
   *                              <b>INPUT_ENV</b>.
1555
   *                              </p>
1556
   * @param string $variable_name <p>
1557
   *                              Name of a variable to get.
1558
   *                              </p>
1559 1
   * @param int    $filter        [optional] <p>
1560
   *                              The ID of the filter to apply. The
1561 1
   *                              manual page lists the available filters.
1562
   *                              </p>
1563
   * @param mixed  $options       [optional] <p>
1564
   *                              Associative array of options or bitwise disjunction of flags. If filter
1565
   *                              accepts options, flags can be provided in "flags" field of array.
1566
   *                              </p>
1567
   *
1568
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1569
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1570
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1571
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1572
   * @since 5.2.0
1573
   */
1574 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1575
  {
1576
    if (4 > func_num_args()) {
1577 7
      $var = filter_input($type, $variable_name, $filter);
1578
    } else {
1579 7
      $var = filter_input($type, $variable_name, $filter, $options);
1580 7
    }
1581
1582 7
    return self::filter($var);
1583
  }
1584 7
1585 2
  /**
1586
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1587
   *
1588 7
   * Gets external variables and optionally filters them
1589 1
   *
1590 1
   * @link  http://php.net/manual/en/function.filter-input-array.php
1591 1
   *
1592
   * @param int   $type       <p>
1593 7
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1594
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1595
   *                          <b>INPUT_ENV</b>.
1596
   *                          </p>
1597
   * @param mixed $definition [optional] <p>
1598
   *                          An array defining the arguments. A valid key is a string
1599
   *                          containing a variable name and a valid value is either a filter type, or an array
1600
   *                          optionally specifying the filter, flags and options. If the value is an
1601
   *                          array, valid keys are filter which specifies the
1602
   *                          filter type,
1603 1
   *                          flags which specifies any flags that apply to the
1604
   *                          filter, and options which specifies any options that
1605 1
   *                          apply to the filter. See the example below for a better understanding.
1606
   *                          </p>
1607 1
   *                          <p>
1608
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1609
   *                          input array are filtered by this filter.
1610 1
   *                          </p>
1611 1
   * @param bool  $add_empty  [optional] <p>
1612
   *                          Add missing keys as <b>NULL</b> to the return value.
1613 1
   *                          </p>
1614
   *
1615
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1616 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1617 1
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1618 1
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1619 1
   * fails.
1620 1
   * @since 5.2.0
1621
   */
1622 1 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1623
  {
1624
    if (2 > func_num_args()) {
1625
      $a = filter_input_array($type);
1626
    } else {
1627
      $a = filter_input_array($type, $definition, $add_empty);
1628
    }
1629
1630
    return self::filter($a);
1631
  }
1632 1
1633
  /**
1634 1
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1635
   *
1636
   * Filters a variable with a specified filter
1637
   *
1638 1
   * @link  http://php.net/manual/en/function.filter-var.php
1639
   *
1640
   * @param mixed $variable <p>
1641
   *                        Value to filter.
1642
   *                        </p>
1643
   * @param int   $filter   [optional] <p>
1644
   *                        The ID of the filter to apply. The
1645
   *                        manual page lists the available filters.
1646
   *                        </p>
1647
   * @param mixed $options  [optional] <p>
1648
   *                        Associative array of options or bitwise disjunction of flags. If filter
1649
   *                        accepts options, flags can be provided in "flags" field of array. For
1650
   *                        the "callback" filter, callable type should be passed. The
1651
   *                        callback must accept one argument, the value to be filtered, and return
1652
   *                        the value after filtering/sanitizing it.
1653
   *                        </p>
1654 1
   *                        <p>
1655
   *                        <code>
1656 1
   *                        // for filters that accept options, use this format
1657 1
   *                        $options = array(
1658
   *                        'options' => array(
1659
   *                        'default' => 3, // value to return if the filter fails
1660 1
   *                        // other options here
1661
   *                        'min_range' => 0
1662 1
   *                        ),
1663 1
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1664 1
   *                        );
1665 1
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1666 1
   *                        // for filter that only accept flags, you can pass them directly
1667 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1668 1
   *                        // for filter that only accept flags, you can also pass as an array
1669 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1670 1
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1671 1
   *                        // callback validate filter
1672 1
   *                        function foo($value)
1673
   *                        {
1674
   *                        // Expected format: Surname, GivenNames
1675
   *                        if (strpos($value, ", ") === false) return false;
1676
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1677
   *                        $empty = (empty($surname) || empty($givennames));
1678
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1679
   *                        if ($empty || $notstrings) {
1680
   *                        return false;
1681
   *                        } else {
1682
   *                        return $value;
1683
   *                        }
1684
   *                        }
1685
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1686
   *                        </code>
1687
   *                        </p>
1688
   *
1689
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1690
   * @since 5.2.0
1691
   */
1692 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1693 1
  {
1694
    if (3 > func_num_args()) {
1695
      $variable = filter_var($variable, $filter);
1696
    } else {
1697
      $variable = filter_var($variable, $filter, $options);
1698
    }
1699
1700
    return self::filter($variable);
1701
  }
1702
1703
  /**
1704
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1705
   *
1706
   * Gets multiple variables and optionally filters them
1707
   *
1708
   * @link  http://php.net/manual/en/function.filter-var-array.php
1709
   *
1710
   * @param array $data       <p>
1711
   *                          An array with string keys containing the data to filter.
1712
   *                          </p>
1713
   * @param mixed $definition [optional] <p>
1714
   *                          An array defining the arguments. A valid key is a string
1715
   *                          containing a variable name and a valid value is either a
1716
   *                          filter type, or an
1717
   *                          array optionally specifying the filter, flags and options.
1718
   *                          If the value is an array, valid keys are filter
1719
   *                          which specifies the filter type,
1720
   *                          flags which specifies any flags that apply to the
1721
   *                          filter, and options which specifies any options that
1722
   *                          apply to the filter. See the example below for a better understanding.
1723
   *                          </p>
1724
   *                          <p>
1725
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1726
   *                          input array are filtered by this filter.
1727
   *                          </p>
1728
   * @param bool  $add_empty  [optional] <p>
1729
   *                          Add missing keys as <b>NULL</b> to the return value.
1730
   *                          </p>
1731
   *
1732
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1733
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1734
   * the variable is not set.
1735
   * @since 5.2.0
1736
   */
1737 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1738
  {
1739
    if (2 > func_num_args()) {
1740
      $a = filter_var_array($data);
1741
    } else {
1742
      $a = filter_var_array($data, $definition, $add_empty);
1743
    }
1744
1745
    return self::filter($a);
1746
  }
1747
1748
  /**
1749
   * Check if the number of unicode characters are not more than the specified integer.
1750
   *
1751
   * @param string $str      The original string to be checked.
1752 1
   * @param int    $box_size The size in number of chars to be checked against string.
1753
   *
1754 1
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1755 1
   */
1756
  public static function fits_inside($str, $box_size)
1757 1
  {
1758
    return (self::strlen($str) <= $box_size);
1759
  }
1760
1761
  /**
1762
   * Try to fix simple broken UTF-8 strings.
1763
   *
1764
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1765
   *
1766
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1767
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1768
   * See: http://en.wikipedia.org/wiki/Windows-1252
1769
   *
1770
   * @param string $str <p>The input string</p>
1771
   *
1772 1
   * @return string
1773
   */
1774 1 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1775
  {
1776
    // init
1777
    $str = (string)$str;
1778
1779
    if (!isset($str[0])) {
1780
      return '';
1781
    }
1782
1783
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1784
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1785
1786 1
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1787
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1788 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1789 1
    }
1790
1791
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1792 1
  }
1793 1
1794
  /**
1795
   * Fix a double (or multiple) encoded UTF8 string.
1796 1
   *
1797
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1798
   *
1799
   * @return mixed
1800
   */
1801
  public static function fix_utf8($str)
1802
  {
1803
    if (is_array($str)) {
1804
1805
      /** @noinspection ForeachSourceInspection */
1806
      foreach ($str as $k => $v) {
1807
        /** @noinspection AlterInForeachInspection */
1808
        /** @noinspection OffsetOperationsInspection */
1809
        $str[$k] = self::fix_utf8($v);
1810 1
      }
1811
1812 1
      return $str;
1813
    }
1814
1815
    $last = '';
1816
    while ($last !== $str) {
1817
      $last = $str;
1818
      $str = self::to_utf8(
1819
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1818 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1820
      );
1821
    }
1822
1823
    return $str;
1824
  }
1825
1826 2
  /**
1827
   * Get character of a specific character.
1828
   *
1829 2
   * @param string $char
1830
   *
1831 2
   * @return string <p>'RTL' or 'LTR'</p>
1832 2
   */
1833 1
  public static function getCharDirection($char)
1834 1
  {
1835
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1836 2
      self::checkForSupport();
1837 1
    }
1838 1
1839
    if (self::$support['intlChar'] === true) {
1840 2
      $tmpReturn = \IntlChar::charDirection($char);
1841 2
1842 2
      // from "IntlChar"-Class
1843
      $charDirection = array(
1844 2
          'RTL' => array(1, 13, 14, 15, 21),
1845
          'LTR' => array(0, 11, 12, 20),
1846
      );
1847
1848
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1849
        return 'LTR';
1850
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1851
        return 'RTL';
1852
      }
1853
    }
1854
1855
    $c = static::chr_to_decimal($char);
1856
1857
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1858
      return 'LTR';
1859
    }
1860
1861
    if (0x85e >= $c) {
1862
1863
      if (0x5be === $c ||
1864
          0x5c0 === $c ||
1865
          0x5c3 === $c ||
1866
          0x5c6 === $c ||
1867
          (0x5d0 <= $c && 0x5ea >= $c) ||
1868
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1869
          0x608 === $c ||
1870
          0x60b === $c ||
1871
          0x60d === $c ||
1872
          0x61b === $c ||
1873
          (0x61e <= $c && 0x64a >= $c) ||
1874
          (0x66d <= $c && 0x66f >= $c) ||
1875
          (0x671 <= $c && 0x6d5 >= $c) ||
1876
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1877
          (0x6ee <= $c && 0x6ef >= $c) ||
1878
          (0x6fa <= $c && 0x70d >= $c) ||
1879
          0x710 === $c ||
1880
          (0x712 <= $c && 0x72f >= $c) ||
1881
          (0x74d <= $c && 0x7a5 >= $c) ||
1882
          0x7b1 === $c ||
1883
          (0x7c0 <= $c && 0x7ea >= $c) ||
1884
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1885
          0x7fa === $c ||
1886
          (0x800 <= $c && 0x815 >= $c) ||
1887
          0x81a === $c ||
1888
          0x824 === $c ||
1889
          0x828 === $c ||
1890
          (0x830 <= $c && 0x83e >= $c) ||
1891
          (0x840 <= $c && 0x858 >= $c) ||
1892
          0x85e === $c
1893
      ) {
1894
        return 'RTL';
1895
      }
1896
1897
    } elseif (0x200f === $c) {
1898
1899
      return 'RTL';
1900
1901
    } elseif (0xfb1d <= $c) {
1902
1903
      if (0xfb1d === $c ||
1904
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1905
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1906
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1907
          0xfb3e === $c ||
1908
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1909
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1910
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1911
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1912
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1913
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1914
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1915
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1916
          (0xfe76 <= $c && 0xfefc >= $c) ||
1917
          (0x10800 <= $c && 0x10805 >= $c) ||
1918
          0x10808 === $c ||
1919
          (0x1080a <= $c && 0x10835 >= $c) ||
1920
          (0x10837 <= $c && 0x10838 >= $c) ||
1921
          0x1083c === $c ||
1922
          (0x1083f <= $c && 0x10855 >= $c) ||
1923
          (0x10857 <= $c && 0x1085f >= $c) ||
1924
          (0x10900 <= $c && 0x1091b >= $c) ||
1925
          (0x10920 <= $c && 0x10939 >= $c) ||
1926 9
          0x1093f === $c ||
1927
          0x10a00 === $c ||
1928 9
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1929
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1930 9
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1931 6
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1932
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1933
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1934 9
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1935 7
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1936
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1937
          (0x10b78 <= $c && 0x10b7f >= $c)
1938
      ) {
1939 9
        return 'RTL';
1940 9
      }
1941
    }
1942 9
1943 9
    return 'LTR';
1944 9
  }
1945 9
1946 9
  /**
1947 6
   * get data from "/data/*.ser"
1948
   *
1949
   * @param string $file
1950 9
   *
1951 2
   * @return bool|string|array|int <p>Will return false on error.</p>
1952 2
   */
1953
  private static function getData($file)
1954 9
  {
1955 4
    $file = __DIR__ . '/data/' . $file . '.php';
1956 4
    if (file_exists($file)) {
1957 4
      /** @noinspection PhpIncludeInspection */
1958
      return require $file;
1959
    } else {
1960 4
      return false;
1961
    }
1962
  }
1963 9
1964
  /**
1965 9
   * alias for "UTF8::string_has_bom()"
1966 9
   *
1967
   * @see UTF8::string_has_bom()
1968 7
   *
1969
   * @param string $str
1970 7
   *
1971 6
   * @return bool
1972
   *
1973 4
   * @deprecated
1974
   */
1975 9
  public static function hasBom($str)
1976
  {
1977 9
    return self::string_has_bom($str);
1978
  }
1979
1980 9
  /**
1981 9
   * Converts a hexadecimal-value into an UTF-8 character.
1982 9
   *
1983
   * @param string $hexdec <p>The hexadecimal value.</p>
1984 9
   *
1985
   * @return string|false <p>One single UTF-8 character.</p>
1986 9
   */
1987
  public static function hex_to_chr($hexdec)
1988 9
  {
1989
    return self::decimal_to_chr(hexdec($hexdec));
1990
  }
1991
1992
  /**
1993
   * Converts hexadecimal U+xxxx code point representation to integer.
1994
   *
1995
   * INFO: opposite to UTF8::int_to_hex()
1996
   *
1997
   * @param string $hexdec <p>The hexadecimal code point representation.</p>
1998
   *
1999
   * @return int|false <p>The code point, or false on failure.</p>
2000
   */
2001
  public static function hex_to_int($hexdec)
2002
  {
2003
    if (!$hexdec) {
2004
      return false;
2005
    }
2006
2007
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexdec, $match)) {
2008
      return intval($match[1], 16);
2009
    }
2010
2011
    return false;
2012
  }
2013
2014
  /**
2015
   * alias for "UTF8::html_entity_decode()"
2016
   *
2017
   * @see UTF8::html_entity_decode()
2018
   *
2019
   * @param string $str
2020
   * @param int    $flags
2021
   * @param string $encoding
2022
   *
2023
   * @return string
2024
   */
2025
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2026
  {
2027
    return self::html_entity_decode($str, $flags, $encoding);
2028
  }
2029
2030
  /**
2031
   * Converts a UTF-8 string to a series of HTML numbered entities.
2032
   *
2033
   * INFO: opposite to UTF8::html_decode()
2034
   *
2035
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2036
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2037
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2038
   *
2039
   * @return string <p>HTML numbered entities.</p>
2040
   */
2041
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2042
  {
2043
    // init
2044
    $str = (string)$str;
2045
2046
    if (!isset($str[0])) {
2047
      return '';
2048
    }
2049
2050
    if ($encoding !== 'UTF-8') {
2051
      $encoding = self::normalize_encoding($encoding);
2052
    }
2053
2054
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2055
    if (function_exists('mb_encode_numericentity')) {
2056
2057
      $startCode = 0x00;
2058
      if ($keepAsciiChars === true) {
2059
        $startCode = 0x80;
2060
      }
2061
2062
      return mb_encode_numericentity(
2063
          $str,
2064
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2065
          $encoding
2066
      );
2067
    }
2068
2069
    return implode(
2070
        '',
2071
        array_map(
2072
            function ($data) use ($keepAsciiChars, $encoding) {
2073
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2074
            },
2075
            self::split($str)
2076
        )
2077
    );
2078
  }
2079
2080
  /**
2081
   * UTF-8 version of html_entity_decode()
2082
   *
2083
   * The reason we are not using html_entity_decode() by itself is because
2084
   * while it is not technically correct to leave out the semicolon
2085
   * at the end of an entity most browsers will still interpret the entity
2086
   * correctly. html_entity_decode() does not convert entities without
2087
   * semicolons, so we are left with our own little solution here. Bummer.
2088
   *
2089
   * Convert all HTML entities to their applicable characters
2090
   *
2091
   * INFO: opposite to UTF8::html_encode()
2092
   *
2093
   * @link http://php.net/manual/en/function.html-entity-decode.php
2094 2
   *
2095
   * @param string $str      <p>
2096 2
   *                         The input string.
2097 1
   *                         </p>
2098 1
   * @param int    $flags    [optional] <p>
2099
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2100 2
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2101
   *                         <table>
2102 2
   *                         Available <i>flags</i> constants
2103 1
   *                         <tr valign="top">
2104
   *                         <td>Constant Name</td>
2105
   *                         <td>Description</td>
2106 2
   *                         </tr>
2107 2
   *                         <tr valign="top">
2108 2
   *                         <td><b>ENT_COMPAT</b></td>
2109 2
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2110 2
   *                         </tr>
2111 1
   *                         <tr valign="top">
2112
   *                         <td><b>ENT_QUOTES</b></td>
2113 1
   *                         <td>Will convert both double and single quotes.</td>
2114 1
   *                         </tr>
2115 1
   *                         <tr valign="top">
2116 1
   *                         <td><b>ENT_NOQUOTES</b></td>
2117 1
   *                         <td>Will leave both double and single quotes unconverted.</td>
2118 2
   *                         </tr>
2119
   *                         <tr valign="top">
2120 2
   *                         <td><b>ENT_HTML401</b></td>
2121
   *                         <td>
2122
   *                         Handle code as HTML 4.01.
2123
   *                         </td>
2124
   *                         </tr>
2125
   *                         <tr valign="top">
2126
   *                         <td><b>ENT_XML1</b></td>
2127
   *                         <td>
2128
   *                         Handle code as XML 1.
2129
   *                         </td>
2130
   *                         </tr>
2131
   *                         <tr valign="top">
2132
   *                         <td><b>ENT_XHTML</b></td>
2133
   *                         <td>
2134
   *                         Handle code as XHTML.
2135
   *                         </td>
2136
   *                         </tr>
2137
   *                         <tr valign="top">
2138
   *                         <td><b>ENT_HTML5</b></td>
2139
   *                         <td>
2140
   *                         Handle code as HTML 5.
2141
   *                         </td>
2142
   *                         </tr>
2143
   *                         </table>
2144
   *                         </p>
2145
   * @param string $encoding [optional] <p>Encoding to use.</p>
2146
   *
2147
   * @return string <p>The decoded string.</p>
2148
   */
2149
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2150
  {
2151
    // init
2152
    $str = (string)$str;
2153
2154
    if (!isset($str[0])) {
2155
      return '';
2156
    }
2157
2158
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2159
      return $str;
2160
    }
2161
2162
    if (
2163
        strpos($str, '&') === false
2164
        ||
2165
        (
2166
            strpos($str, '&#') === false
2167
            &&
2168
            strpos($str, ';') === false
2169
        )
2170
    ) {
2171
      return $str;
2172
    }
2173
2174
    if ($encoding !== 'UTF-8') {
2175
      $encoding = self::normalize_encoding($encoding);
2176
    }
2177
2178
    if ($flags === null) {
2179
      if (Bootup::is_php('5.4') === true) {
2180
        $flags = ENT_QUOTES | ENT_HTML5;
2181
      } else {
2182
        $flags = ENT_QUOTES;
2183
      }
2184
    }
2185
2186
    do {
2187
      $str_compare = $str;
2188
2189
      $str = preg_replace_callback(
2190
          "/&#\d{2,6};/",
2191
          function ($matches) use ($encoding) {
2192
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2193
2194
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2195
              return $returnTmp;
2196
            } else {
2197
              return $matches[0];
2198
            }
2199
          },
2200
          $str
2201
      );
2202
2203
      // decode numeric & UTF16 two byte entities
2204
      $str = html_entity_decode(
2205
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2206
          $flags,
2207
          $encoding
2208
      );
2209
2210
    } while ($str_compare !== $str);
2211
2212
    return $str;
2213
  }
2214
2215
  /**
2216
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2217
   *
2218
   * @link http://php.net/manual/en/function.htmlentities.php
2219
   *
2220
   * @param string $str           <p>
2221
   *                              The input string.
2222
   *                              </p>
2223
   * @param int    $flags         [optional] <p>
2224
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2225
   *                              invalid code unit sequences and the used document type. The default is
2226
   *                              ENT_COMPAT | ENT_HTML401.
2227
   *                              <table>
2228
   *                              Available <i>flags</i> constants
2229
   *                              <tr valign="top">
2230
   *                              <td>Constant Name</td>
2231
   *                              <td>Description</td>
2232 1
   *                              </tr>
2233
   *                              <tr valign="top">
2234 1
   *                              <td><b>ENT_COMPAT</b></td>
2235
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2236
   *                              </tr>
2237
   *                              <tr valign="top">
2238 1
   *                              <td><b>ENT_QUOTES</b></td>
2239
   *                              <td>Will convert both double and single quotes.</td>
2240
   *                              </tr>
2241
   *                              <tr valign="top">
2242
   *                              <td><b>ENT_NOQUOTES</b></td>
2243
   *                              <td>Will leave both double and single quotes unconverted.</td>
2244
   *                              </tr>
2245
   *                              <tr valign="top">
2246 1
   *                              <td><b>ENT_IGNORE</b></td>
2247
   *                              <td>
2248 1
   *                              Silently discard invalid code unit sequences instead of returning
2249
   *                              an empty string. Using this flag is discouraged as it
2250
   *                              may have security implications.
2251
   *                              </td>
2252
   *                              </tr>
2253
   *                              <tr valign="top">
2254
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2255
   *                              <td>
2256
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2257
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2258
   *                              </td>
2259
   *                              </tr>
2260
   *                              <tr valign="top">
2261 3
   *                              <td><b>ENT_DISALLOWED</b></td>
2262
   *                              <td>
2263 3
   *                              Replace invalid code points for the given document type with a
2264 3
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2265
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2266 3
   *                              instance, to ensure the well-formedness of XML documents with
2267
   *                              embedded external content.
2268 3
   *                              </td>
2269
   *                              </tr>
2270
   *                              <tr valign="top">
2271
   *                              <td><b>ENT_HTML401</b></td>
2272
   *                              <td>
2273
   *                              Handle code as HTML 4.01.
2274
   *                              </td>
2275
   *                              </tr>
2276
   *                              <tr valign="top">
2277
   *                              <td><b>ENT_XML1</b></td>
2278
   *                              <td>
2279 1
   *                              Handle code as XML 1.
2280
   *                              </td>
2281 1
   *                              </tr>
2282
   *                              <tr valign="top">
2283
   *                              <td><b>ENT_XHTML</b></td>
2284
   *                              <td>
2285
   *                              Handle code as XHTML.
2286
   *                              </td>
2287
   *                              </tr>
2288
   *                              <tr valign="top">
2289 2
   *                              <td><b>ENT_HTML5</b></td>
2290
   *                              <td>
2291 2
   *                              Handle code as HTML 5.
2292
   *                              </td>
2293
   *                              </tr>
2294
   *                              </table>
2295
   *                              </p>
2296
   * @param string $encoding      [optional] <p>
2297
   *                              Like <b>htmlspecialchars</b>,
2298
   *                              <b>htmlentities</b> takes an optional third argument
2299
   *                              <i>encoding</i> which defines encoding used in
2300
   *                              conversion.
2301
   *                              Although this argument is technically optional, you are highly
2302
   *                              encouraged to specify the correct value for your code.
2303 2
   *                              </p>
2304
   * @param bool   $double_encode [optional] <p>
2305 2
   *                              When <i>double_encode</i> is turned off PHP will not
2306
   *                              encode existing html entities. The default is to convert everything.
2307
   *                              </p>
2308
   *
2309
   *
2310
   * @return string the encoded string.
2311
   * </p>
2312
   * <p>
2313
   * If the input <i>string</i> contains an invalid code unit
2314
   * sequence within the given <i>encoding</i> an empty string
2315
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2316
   * <b>ENT_SUBSTITUTE</b> flags are set.
2317 1
   */
2318
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2319 1
  {
2320
    if ($encoding !== 'UTF-8') {
2321
      $encoding = self::normalize_encoding($encoding);
2322
    }
2323
2324
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2325
2326
    if ($encoding !== 'UTF-8') {
2327
      return $str;
2328
    }
2329
2330
    $byteLengths = self::chr_size_list($str);
2331
    $search = array();
2332
    $replacements = array();
2333
    foreach ($byteLengths as $counter => $byteLength) {
2334
      if ($byteLength >= 3) {
2335
        $char = self::access($str, $counter);
2336
2337
        if (!isset($replacements[$char])) {
2338
          $search[$char] = $char;
2339
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2335 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2340
        }
2341
      }
2342
    }
2343
2344
    return str_replace($search, $replacements, $str);
2345
  }
2346
2347
  /**
2348
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2349
   *
2350
   * INFO: Take a look at "UTF8::htmlentities()"
2351
   *
2352
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2353
   *
2354
   * @param string $str           <p>
2355
   *                              The string being converted.
2356
   *                              </p>
2357
   * @param int    $flags         [optional] <p>
2358
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2359 1
   *                              invalid code unit sequences and the used document type. The default is
2360
   *                              ENT_COMPAT | ENT_HTML401.
2361 1
   *                              <table>
2362
   *                              Available <i>flags</i> constants
2363
   *                              <tr valign="top">
2364
   *                              <td>Constant Name</td>
2365
   *                              <td>Description</td>
2366
   *                              </tr>
2367
   *                              <tr valign="top">
2368
   *                              <td><b>ENT_COMPAT</b></td>
2369
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2370
   *                              </tr>
2371
   *                              <tr valign="top">
2372
   *                              <td><b>ENT_QUOTES</b></td>
2373
   *                              <td>Will convert both double and single quotes.</td>
2374
   *                              </tr>
2375
   *                              <tr valign="top">
2376
   *                              <td><b>ENT_NOQUOTES</b></td>
2377
   *                              <td>Will leave both double and single quotes unconverted.</td>
2378
   *                              </tr>
2379
   *                              <tr valign="top">
2380
   *                              <td><b>ENT_IGNORE</b></td>
2381
   *                              <td>
2382
   *                              Silently discard invalid code unit sequences instead of returning
2383
   *                              an empty string. Using this flag is discouraged as it
2384
   *                              may have security implications.
2385
   *                              </td>
2386
   *                              </tr>
2387 1
   *                              <tr valign="top">
2388
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2389 1
   *                              <td>
2390
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2391
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2392
   *                              </td>
2393
   *                              </tr>
2394
   *                              <tr valign="top">
2395
   *                              <td><b>ENT_DISALLOWED</b></td>
2396
   *                              <td>
2397
   *                              Replace invalid code points for the given document type with a
2398
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2399
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2400
   *                              instance, to ensure the well-formedness of XML documents with
2401 1
   *                              embedded external content.
2402
   *                              </td>
2403 1
   *                              </tr>
2404
   *                              <tr valign="top">
2405
   *                              <td><b>ENT_HTML401</b></td>
2406
   *                              <td>
2407
   *                              Handle code as HTML 4.01.
2408
   *                              </td>
2409
   *                              </tr>
2410
   *                              <tr valign="top">
2411
   *                              <td><b>ENT_XML1</b></td>
2412
   *                              <td>
2413
   *                              Handle code as XML 1.
2414
   *                              </td>
2415
   *                              </tr>
2416 16
   *                              <tr valign="top">
2417
   *                              <td><b>ENT_XHTML</b></td>
2418 16
   *                              <td>
2419
   *                              Handle code as XHTML.
2420
   *                              </td>
2421
   *                              </tr>
2422
   *                              <tr valign="top">
2423
   *                              <td><b>ENT_HTML5</b></td>
2424
   *                              <td>
2425
   *                              Handle code as HTML 5.
2426
   *                              </td>
2427
   *                              </tr>
2428
   *                              </table>
2429
   *                              </p>
2430
   * @param string $encoding      [optional] <p>
2431 28
   *                              Defines encoding used in conversion.
2432
   *                              </p>
2433 28
   *                              <p>
2434
   *                              For the purposes of this function, the encodings
2435 28
   *                              ISO-8859-1, ISO-8859-15,
2436 5
   *                              UTF-8, cp866,
2437
   *                              cp1251, cp1252, and
2438
   *                              KOI8-R are effectively equivalent, provided the
2439 28
   *                              <i>string</i> itself is valid for the encoding, as
2440
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2441
   *                              the same positions in all of these encodings.
2442
   *                              </p>
2443
   * @param bool   $double_encode [optional] <p>
2444
   *                              When <i>double_encode</i> is turned off PHP will not
2445
   *                              encode existing html entities, the default is to convert everything.
2446
   *                              </p>
2447
   *
2448
   * @return string The converted string.
2449 1
   * </p>
2450
   * <p>
2451 1
   * If the input <i>string</i> contains an invalid code unit
2452
   * sequence within the given <i>encoding</i> an empty string
2453 1
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2454 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
2455
   */
2456
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2457 1
  {
2458 1
    if ($encoding !== 'UTF-8') {
2459
      $encoding = self::normalize_encoding($encoding);
2460 1
    }
2461
2462
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2463
  }
2464
2465
  /**
2466
   * Checks whether iconv is available on the server.
2467
   *
2468
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2469
   */
2470
  public static function iconv_loaded()
2471 16
  {
2472
    $return = extension_loaded('iconv') ? true : false;
2473
2474 16
    // INFO: "default_charset" is already set by the "Bootup"-class
2475
2476
    if (!Bootup::is_php('5.6')) {
2477 16
      // INFO: "iconv_set_encoding" is deprecated cince PHP 5.6
2478
      iconv_set_encoding('input_encoding', 'UTF-8');
2479 16
      iconv_set_encoding('output_encoding', 'UTF-8');
2480 16
      iconv_set_encoding('internal_encoding', 'UTF-8');
2481 15
    }
2482 16
2483 6
    return $return;
2484
  }
2485 15
2486
  /**
2487
   * alias for "UTF8::decimal_to_chr()"
2488
   *
2489
   * @see UTF8::decimal_to_chr()
2490
   *
2491
   * @param int $int
2492
   *
2493
   * @return string
2494
   */
2495
  public static function int_to_chr($int)
2496
  {
2497
    return self::decimal_to_chr($int);
2498
  }
2499
2500
  /**
2501
   * Converts Integer to hexadecimal U+xxxx code point representation.
2502
   *
2503
   * INFO: opposite to UTF8::hex_to_int()
2504
   *
2505
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2506
   * @param string $pfix [optional]
2507
   *
2508
   * @return string <p>The code point, or empty string on failure.</p>
2509
   */
2510
  public static function int_to_hex($int, $pfix = 'U+')
2511
  {
2512
    if (ctype_digit((string)$int)) {
2513
      $hex = dechex((int)$int);
2514
2515
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2516
2517
      return $pfix . $hex;
2518
    }
2519
2520
    return '';
2521
  }
2522
2523
  /**
2524
   * Checks whether intl-char is available on the server.
2525
   *
2526
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2527
   */
2528
  public static function intlChar_loaded()
2529
  {
2530
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2531
  }
2532
2533
  /**
2534
   * Checks whether intl is available on the server.
2535
   *
2536 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2537
   */
2538 1
  public static function intl_loaded()
2539
  {
2540 1
    return extension_loaded('intl') ? true : false;
2541
  }
2542
2543
  /**
2544
   * alias for "UTF8::is_ascii()"
2545 1
   *
2546
   * @see UTF8::is_ascii()
2547 1
   *
2548
   * @param string $str
2549 1
   *
2550 1
   * @return boolean
2551
   *
2552 1
   * @deprecated
2553
   */
2554
  public static function isAscii($str)
2555
  {
2556
    return self::is_ascii($str);
2557
  }
2558
2559
  /**
2560
   * alias for "UTF8::is_base64()"
2561
   *
2562
   * @see UTF8::is_base64()
2563 1
   *
2564
   * @param string $str
2565 1
   *
2566
   * @return bool
2567 1
   *
2568
   * @deprecated
2569
   */
2570
  public static function isBase64($str)
2571
  {
2572 1
    return self::is_base64($str);
2573 1
  }
2574 1
2575 1
  /**
2576 1
   * alias for "UTF8::is_binary()"
2577
   *
2578 1
   * @see UTF8::is_binary()
2579
   *
2580
   * @param string $str
2581
   *
2582
   * @return bool
2583
   *
2584
   * @deprecated
2585
   */
2586
  public static function isBinary($str)
2587
  {
2588
    return self::is_binary($str);
2589
  }
2590
2591
  /**
2592
   * alias for "UTF8::is_bom()"
2593 4
   *
2594
   * @see UTF8::is_bom()
2595 4
   *
2596
   * @param string $utf8_chr
2597 4
   *
2598
   * @return boolean
2599 4
   *
2600 4
   * @deprecated
2601 4
   */
2602 4
  public static function isBom($utf8_chr)
2603 4
  {
2604 4
    return self::is_bom($utf8_chr);
2605 4
  }
2606 4
2607 4
  /**
2608 2
   * alias for "UTF8::is_html()"
2609 2
   *
2610 4
   * @see UTF8::is_html()
2611 4
   *
2612 4
   * @param string $str
2613
   *
2614 4
   * @return boolean
2615 4
   *
2616 4
   * @deprecated
2617 4
   */
2618 4
  public static function isHtml($str)
2619 4
  {
2620 4
    return self::is_html($str);
2621 4
  }
2622 4
2623 3
  /**
2624 3
   * alias for "UTF8::is_json()"
2625 4
   *
2626 4
   * @see UTF8::is_json()
2627 4
   *
2628
   * @param string $str
2629 4
   *
2630 3
   * @return bool
2631 2
   *
2632
   * @deprecated
2633 3
   */
2634
  public static function isJson($str)
2635
  {
2636
    return self::is_json($str);
2637 3
  }
2638
2639 3
  /**
2640
   * alias for "UTF8::is_utf16()"
2641
   *
2642
   * @see UTF8::is_utf16()
2643
   *
2644
   * @param string $str
2645
   *
2646
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2647
   *
2648
   * @deprecated
2649
   */
2650
  public static function isUtf16($str)
2651
  {
2652
    return self::is_utf16($str);
2653 3
  }
2654
2655 3
  /**
2656
   * alias for "UTF8::is_utf32()"
2657 3
   *
2658
   * @see UTF8::is_utf32()
2659 3
   *
2660 3
   * @param string $str
2661 3
   *
2662 3
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2663 3
   *
2664 3
   * @deprecated
2665 3
   */
2666 3
  public static function isUtf32($str)
2667 3
  {
2668 1
    return self::is_utf32($str);
2669 1
  }
2670 3
2671 3
  /**
2672 3
   * alias for "UTF8::is_utf8()"
2673
   *
2674 3
   * @see UTF8::is_utf8()
2675 3
   *
2676 3
   * @param string $str
2677 3
   * @param bool   $strict
2678 3
   *
2679 3
   * @return bool
2680 3
   *
2681 3
   * @deprecated
2682 3
   */
2683 1
  public static function isUtf8($str, $strict = false)
2684 1
  {
2685 3
    return self::is_utf8($str, $strict);
2686 3
  }
2687 3
2688
  /**
2689 3
   * Checks if a string is 7 bit ASCII.
2690 1
   *
2691 1
   * @param string $str <p>The string to check.</p>
2692
   *
2693 1
   * @return bool <p>
2694
   *              <strong>true</strong> if it is ASCII<br />
2695
   *              <strong>false</strong> otherwise
2696
   *              </p>
2697 3
   */
2698
  public static function is_ascii($str)
2699 3
  {
2700
    $str = (string)$str;
2701
2702
    if (!isset($str[0])) {
2703
      return true;
2704
    }
2705
2706
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2707
  }
2708
2709
  /**
2710
   * Returns true if the string is base64 encoded, false otherwise.
2711
   *
2712 43
   * @param string $str <p>The input string.</p>
2713
   *
2714 43
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2715
   */
2716 43
  public static function is_base64($str)
2717 3
  {
2718
    $str = (string)$str;
2719
2720 41
    if (!isset($str[0])) {
2721 1
      return false;
2722 1
    }
2723
2724
    if (base64_encode(base64_decode($str, true)) === $str) {
2725
      return true;
2726
    } else {
2727
      return false;
2728
    }
2729
  }
2730 41
2731
  /**
2732
   * Check if the input is binary... (is look like a hack).
2733
   *
2734
   * @param mixed $input
2735
   *
2736
   * @return bool
2737
   */
2738
  public static function is_binary($input)
2739
  {
2740 41
    if (preg_match('~^[01]+$~', $input)) {
2741
      return true;
2742 41
    }
2743 41
2744 41
    $testLength = strlen($input);
2745
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2746
      return true;
2747 41
    }
2748 41
2749 41
    if (substr_count($input, "\x00") > 0) {
2750
      return true;
2751
    }
2752 41
2753
    return false;
2754 36
  }
2755 41
2756
  /**
2757 34
   * Check if the file is binary.
2758 34
   *
2759 34
   * @param string $file
2760 34
   *
2761 39
   * @return boolean
2762
   */
2763 21
  public static function is_binary_file($file)
2764 21
  {
2765 21
    try {
2766 21
      $fp = fopen($file, 'rb');
2767 33
      $block = fread($fp, 512);
2768
      fclose($fp);
2769 9
    } catch (\Exception $e) {
2770 9
      $block = '';
2771 9
    }
2772 9
2773 16
    return self::is_binary($block);
2774
  }
2775
2776
  /**
2777
   * Checks if the given string is equal to any "Byte Order Mark".
2778
   *
2779
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2780
   *
2781
   * @param string $str <p>The input string.</p>
2782 3
   *
2783 3
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2784 3
   */
2785 3
  public static function is_bom($str)
2786 9
  {
2787
    foreach (self::$bom as $bomString => $bomByteLength) {
2788 3
      if ($str === $bomString) {
2789 3
        return true;
2790 3
      }
2791 3
    }
2792 3
2793
    return false;
2794
  }
2795
2796 5
  /**
2797
   * Check if the string contains any html-tags <lall>.
2798 41
   *
2799
   * @param string $str <p>The input string.</p>
2800
   *
2801 36
   * @return boolean
2802
   */
2803 33
  public static function is_html($str)
2804 33
  {
2805 33
    $str = (string)$str;
2806 33
2807
    if (!isset($str[0])) {
2808
      return false;
2809
    }
2810
2811 33
    // init
2812
    $matches = array();
2813
2814
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2815
2816
    if (count($matches) === 0) {
2817 33
      return false;
2818 33
    } else {
2819 33
      return true;
2820 33
    }
2821
  }
2822 33
2823
  /**
2824 33
   * Try to check if "$str" is an json-string.
2825 33
   *
2826 5
   * @param string $str <p>The input string.</p>
2827
   *
2828
   * @return bool
2829 33
   */
2830 33
  public static function is_json($str)
2831 33
  {
2832 33
    $str = (string)$str;
2833 33
2834
    if (!isset($str[0])) {
2835
      return false;
2836
    }
2837
2838 18
    $json = self::json_decode($str);
2839
2840
    if (
2841 41
        (
2842
          is_object($json)
2843 20
          ||
2844
          is_array($json)
2845
        )
2846
        &&
2847
        json_last_error() === JSON_ERROR_NONE
2848
    ) {
2849
      return true;
2850
    } else {
2851
      return false;
2852
    }
2853
  }
2854
2855
  /**
2856
   * Check if the string is UTF-16.
2857
   *
2858
   * @param string $str <p>The input string.</p>
2859
   *
2860
   * @return int|false <p>
2861
   *                   <strong>false</strong> if is't not UTF-16,<br />
2862
   *                   <strong>1</strong> for UTF-16LE,<br />
2863
   *                   <strong>2</strong> for UTF-16BE.
2864
   *                   </p>
2865
   */
2866 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2867
  {
2868
    $str = self::remove_bom($str);
2869
2870
    if (self::is_binary($str)) {
2871
2872
      $maybeUTF16LE = 0;
2873
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2874
      if ($test) {
2875
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2876
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2877
        if ($test3 === $test) {
2878
          $strChars = self::count_chars($str, true);
2879
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2880
            if (in_array($test3char, $strChars, true) === true) {
2881
              $maybeUTF16LE++;
2882
            }
2883 2
          }
2884
        }
2885 2
      }
2886
2887 2
      $maybeUTF16BE = 0;
2888 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2889 2
      if ($test) {
2890
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2891
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2892
        if ($test3 === $test) {
2893 2
          $strChars = self::count_chars($str, true);
2894
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2895
            if (in_array($test3char, $strChars, true) === true) {
2896
              $maybeUTF16BE++;
2897
            }
2898
          }
2899
        }
2900
      }
2901
2902
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2903
        if ($maybeUTF16LE > $maybeUTF16BE) {
2904
          return 1;
2905
        } else {
2906
          return 2;
2907
        }
2908
      }
2909
2910
    }
2911
2912
    return false;
2913
  }
2914
2915
  /**
2916
   * Check if the string is UTF-32.
2917
   *
2918
   * @param string $str
2919
   *
2920
   * @return int|false <p>
2921
   *                   <strong>false</strong> if is't not UTF-16,<br />
2922
   *                   <strong>1</strong> for UTF-32LE,<br />
2923
   *                   <strong>2</strong> for UTF-32BE.
2924
   *                   </p>
2925
   */
2926 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2927
  {
2928
    $str = self::remove_bom($str);
2929
2930
    if (self::is_binary($str)) {
2931
2932 2
      $maybeUTF32LE = 0;
2933
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2934 2
      if ($test) {
2935
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2936 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2937
        if ($test3 === $test) {
2938
          $strChars = self::count_chars($str, true);
2939 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2940
            if (in_array($test3char, $strChars, true) === true) {
2941
              $maybeUTF32LE++;
2942 2
            }
2943
          }
2944
        }
2945
      }
2946
2947
      $maybeUTF32BE = 0;
2948
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2949
      if ($test) {
2950
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2951
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2952 6
        if ($test3 === $test) {
2953
          $strChars = self::count_chars($str, true);
2954 6
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2955
            if (in_array($test3char, $strChars, true) === true) {
2956
              $maybeUTF32BE++;
2957
            }
2958
          }
2959
        }
2960
      }
2961
2962
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2963
        if ($maybeUTF32LE > $maybeUTF32BE) {
2964
          return 1;
2965 24
        } else {
2966
          return 2;
2967 24
        }
2968
      }
2969 24
2970 2
    }
2971
2972
    return false;
2973
  }
2974 23
2975 2
  /**
2976
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2977
   *
2978 23
   * @see    http://hsivonen.iki.fi/php-utf8/
2979
   *
2980 23
   * @param string $str    <p>The string to be checked.</p>
2981
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2982
   *
2983
   * @return bool
2984
   */
2985
  public static function is_utf8($str, $strict = false)
2986
  {
2987
    $str = (string)$str;
2988
2989
    if (!isset($str[0])) {
2990 1
      return true;
2991
    }
2992 1
2993
    if ($strict === true) {
2994
      if (self::is_utf16($str) !== false) {
2995
        return false;
2996 1
      }
2997
2998
      if (self::is_utf32($str) !== false) {
2999
        return false;
3000
      }
3001
    }
3002
3003
    if (self::pcre_utf8_support() !== true) {
3004
3005
      // If even just the first character can be matched, when the /u
3006
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3007 1
      // invalid, nothing at all will match, even if the string contains
3008
      // some valid sequences
3009 1
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3010 1
3011 1
    } else {
3012
3013 1
      $mState = 0; // cached expected number of octets after the current octet
3014
      // until the beginning of the next UTF8 character sequence
3015
      $mUcs4 = 0; // cached Unicode character
3016
      $mBytes = 1; // cached expected number of octets in the current sequence
3017
      $len = strlen($str);
3018
3019
      /** @noinspection ForeachInvariantsInspection */
3020
      for ($i = 0; $i < $len; $i++) {
3021
        $in = ord($str[$i]);
3022 2
        if ($mState === 0) {
3023
          // When mState is zero we expect either a US-ASCII character or a
3024 2
          // multi-octet sequence.
3025
          if (0 === (0x80 & $in)) {
3026 2
            // US-ASCII, pass straight through.
3027 2
            $mBytes = 1;
3028 2 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3029
            // First octet of 2 octet sequence.
3030 2
            $mUcs4 = $in;
3031
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3032
            $mState = 1;
3033
            $mBytes = 2;
3034
          } elseif (0xE0 === (0xF0 & $in)) {
3035
            // First octet of 3 octet sequence.
3036
            $mUcs4 = $in;
3037
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3038
            $mState = 2;
3039
            $mBytes = 3;
3040 1 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3041
            // First octet of 4 octet sequence.
3042 1
            $mUcs4 = $in;
3043
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3044
            $mState = 3;
3045
            $mBytes = 4;
3046 1
          } elseif (0xF8 === (0xFC & $in)) {
3047
            /* First octet of 5 octet sequence.
3048
            *
3049
            * This is illegal because the encoded codepoint must be either
3050
            * (a) not the shortest form or
3051
            * (b) outside the Unicode range of 0-0x10FFFF.
3052
            * Rather than trying to resynchronize, we will carry on until the end
3053
            * of the sequence and let the later error handling code catch it.
3054
            */
3055
            $mUcs4 = $in;
3056
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3057
            $mState = 4;
3058 1
            $mBytes = 5;
3059 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3060 1
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3061
            $mUcs4 = $in;
3062
            $mUcs4 = ($mUcs4 & 1) << 30;
3063
            $mState = 5;
3064
            $mBytes = 6;
3065
          } else {
3066
            /* Current octet is neither in the US-ASCII range nor a legal first
3067
             * octet of a multi-octet sequence.
3068
             */
3069
            return false;
3070 16
          }
3071
        } else {
3072 16
          // When mState is non-zero, we expect a continuation of the multi-octet
3073
          // sequence
3074 16
          if (0x80 === (0xC0 & $in)) {
3075 2
            // Legal continuation.
3076
            $shift = ($mState - 1) * 6;
3077
            $tmp = $in;
3078 16
            $tmp = ($tmp & 0x0000003F) << $shift;
3079 1
            $mUcs4 |= $tmp;
3080
            /**
3081
             * End of the multi-octet sequence. mUcs4 now contains the final
3082 16
             * Unicode code point to be output
3083 4
             */
3084
            if (0 === --$mState) {
3085
              /*
3086 15
              * Check for illegal sequences and code points.
3087 14
              */
3088
              // From Unicode 3.1, non-shortest form is illegal
3089
              if (
3090 4
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3091 4
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3092 4
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3093
                  (4 < $mBytes) ||
3094
                  // From Unicode 3.2, surrogate characters are illegal.
3095 4
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3096 4
                  // Code points outside the Unicode range are illegal.
3097 4
                  ($mUcs4 > 0x10FFFF)
3098 4
              ) {
3099 4
                return false;
3100 4
              }
3101 4
              // initialize UTF8 cache
3102 4
              $mState = 0;
3103 4
              $mUcs4 = 0;
3104 4
              $mBytes = 1;
3105 4
            }
3106 4
          } else {
3107 4
            /**
3108 4
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3109 4
             * Incomplete multi-octet sequence.
3110
             */
3111 4
            return false;
3112 4
          }
3113 4
        }
3114
      }
3115 4
3116
      return true;
3117 4
    }
3118
  }
3119
3120
  /**
3121
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3122
   * Decodes a JSON string
3123
   *
3124
   * @link http://php.net/manual/en/function.json-decode.php
3125
   *
3126
   * @param string $json    <p>
3127 13
   *                        The <i>json</i> string being decoded.
3128
   *                        </p>
3129 13
   *                        <p>
3130 13
   *                        This function only works with UTF-8 encoded strings.
3131
   *                        </p>
3132 13
   *                        <p>PHP implements a superset of
3133 1
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3134 1
   *                        only supports these values when they are nested inside an array or an object.
3135 1
   *                        </p>
3136
   * @param bool   $assoc   [optional] <p>
3137 13
   *                        When <b>TRUE</b>, returned objects will be converted into
3138
   *                        associative arrays.
3139
   *                        </p>
3140
   * @param int    $depth   [optional] <p>
3141
   *                        User specified recursion depth.
3142
   *                        </p>
3143
   * @param int    $options [optional] <p>
3144
   *                        Bitmask of JSON decode options. Currently only
3145
   *                        <b>JSON_BIGINT_AS_STRING</b>
3146
   *                        is supported (default is to cast large integers as floats)
3147
   *                        </p>
3148
   *
3149
   * @return mixed the value encoded in <i>json</i> in appropriate
3150 18
   * PHP type. Values true, false and
3151
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3152 18
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3153 18
   * <i>json</i> cannot be decoded or if the encoded
3154
   * data is deeper than the recursion limit.
3155 18
   */
3156
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3157 18
  {
3158
    $json = self::filter($json);
3159 2
3160
    if (Bootup::is_php('5.4') === true) {
3161 2
      $json = json_decode($json, $assoc, $depth, $options);
3162
    } else {
3163 1
      $json = json_decode($json, $assoc, $depth);
3164 1
    }
3165
3166 2
    return $json;
3167 2
  }
3168
3169 18
  /**
3170 18
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3171 1
   * Returns the JSON representation of a value.
3172 1
   *
3173
   * @link http://php.net/manual/en/function.json-encode.php
3174 18
   *
3175 18
   * @param mixed $value   <p>
3176
   *                       The <i>value</i> being encoded. Can be any type except
3177 18
   *                       a resource.
3178
   *                       </p>
3179
   *                       <p>
3180
   *                       All string data must be UTF-8 encoded.
3181
   *                       </p>
3182
   *                       <p>PHP implements a superset of
3183
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3184
   *                       only supports these values when they are nested inside an array or an object.
3185
   *                       </p>
3186
   * @param int   $options [optional] <p>
3187
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3188
   *                       <b>JSON_HEX_TAG</b>,
3189
   *                       <b>JSON_HEX_AMP</b>,
3190
   *                       <b>JSON_HEX_APOS</b>,
3191
   *                       <b>JSON_NUMERIC_CHECK</b>,
3192
   *                       <b>JSON_PRETTY_PRINT</b>,
3193
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3194
   *                       <b>JSON_FORCE_OBJECT</b>,
3195
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3196
   *                       constants is described on
3197
   *                       the JSON constants page.
3198
   *                       </p>
3199
   * @param int   $depth   [optional] <p>
3200
   *                       Set the maximum depth. Must be greater than zero.
3201
   *                       </p>
3202
   *
3203
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3204
   */
3205
  public static function json_encode($value, $options = 0, $depth = 512)
3206
  {
3207
    $value = self::filter($value);
3208
3209
    if (Bootup::is_php('5.5')) {
3210
      $json = json_encode($value, $options, $depth);
3211
    } else {
3212
      $json = json_encode($value, $options);
3213
    }
3214
3215
    return $json;
3216
  }
3217
3218
  /**
3219
   * Makes string's first char lowercase.
3220
   *
3221
   * @param string $str <p>The input string</p>
3222
   *
3223
   * @return string <p>The resulting string</p>
3224
   */
3225
  public static function lcfirst($str)
3226
  {
3227
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3228
  }
3229
3230 17
  /**
3231
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3232 17
   *
3233 3
   * @param string $str   <p>The string to be trimmed</p>
3234
   * @param string $chars <p>Optional characters to be stripped</p>
3235
   *
3236 16
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3237
   */
3238 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3239
  {
3240 16
    $str = (string)$str;
3241
3242
    if (!isset($str[0])) {
3243
      return '';
3244
    }
3245
3246
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3247
    if ($chars === INF || !$chars) {
3248 16
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3249 16
    }
3250 15
3251
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3252
  }
3253 9
3254 9
  /**
3255 9
   * Returns the UTF-8 character with the maximum code point in the given data.
3256
   *
3257 9
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3258 1
   *
3259
   * @return string <p>The character with the highest code point than others.</p>
3260
   */
3261 9 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3262 4
  {
3263
    if (is_array($arg)) {
3264
      $arg = implode('', $arg);
3265 9
    }
3266 5
3267
    return self::chr(max(self::codepoints($arg)));
3268
  }
3269 9
3270
  /**
3271
   * Calculates and returns the maximum number of bytes taken by any
3272
   * UTF-8 encoded character in the given string.
3273
   *
3274
   * @param string $str <p>The original Unicode string.</p>
3275
   *
3276
   * @return int <p>Max byte lengths of the given chars.</p>
3277
   */
3278
  public static function max_chr_width($str)
3279
  {
3280
    $bytes = self::chr_size_list($str);
3281
    if (count($bytes) > 0) {
3282
      return (int)max($bytes);
3283
    } else {
3284
      return 0;
3285 1
    }
3286
  }
3287
3288 1
  /**
3289
   * Checks whether mbstring is available on the server.
3290 1
   *
3291 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3292 1
   */
3293
  public static function mbstring_loaded()
3294
  {
3295 1
    $return = extension_loaded('mbstring') ? true : false;
3296
3297
    if ($return === true) {
3298
      \mb_internal_encoding('UTF-8');
3299
    }
3300
3301
    return $return;
3302
  }
3303 41
3304
  /**
3305
   * Returns the UTF-8 character with the minimum code point in the given data.
3306 41
   *
3307
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3308
   *
3309
   * @return string <p>The character with the lowest code point than others.</p>
3310
   */
3311 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3312
  {
3313
    if (is_array($arg)) {
3314
      $arg = implode('', $arg);
3315
    }
3316
3317 1
    return self::chr(min(self::codepoints($arg)));
3318
  }
3319 1
3320 1
  /**
3321
   * alias for "UTF8::normalize_encoding()"
3322
   *
3323 1
   * @see UTF8::normalize_encoding()
3324 1
   *
3325 1
   * @param string $encoding
3326
   *
3327
   * @return string
3328 1
   *
3329
   * @deprecated
3330
   */
3331 1
  public static function normalizeEncoding($encoding)
3332
  {
3333
    return self::normalize_encoding($encoding);
3334
  }
3335 1
3336 1
  /**
3337 1
   * Normalize the encoding-"name" input.
3338
   *
3339
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3340 1
   *
3341
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3342
   */
3343 1
  public static function normalize_encoding($encoding)
3344
  {
3345
    static $staticNormalizeEncodingCache = array();
3346
3347 1
    if (!$encoding) {
3348
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3349 1
    }
3350 1
3351 1
    if ('UTF-8' === $encoding) {
3352 1
      return $encoding;
3353 1
    }
3354
3355
    if (in_array($encoding, self::$iconvEncoding, true)) {
3356
      return $encoding;
3357
    }
3358
3359
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3360
      return $staticNormalizeEncodingCache[$encoding];
3361
    }
3362
3363
    $encodingOrig = $encoding;
3364
    $encoding = strtoupper($encoding);
3365 5
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3366
3367 5
    $equivalences = array(
3368
        'ISO88591'    => 'ISO-8859-1',
3369
        'ISO8859'     => 'ISO-8859-1',
3370
        'ISO'         => 'ISO-8859-1',
3371
        'LATIN1'      => 'ISO-8859-1',
3372
        'LATIN'       => 'ISO-8859-1',
3373
        'WIN1252'     => 'ISO-8859-1',
3374
        'WINDOWS1252' => 'ISO-8859-1',
3375
        'UTF16'       => 'UTF-16',
3376
        'UTF32'       => 'UTF-32',
3377 10
        'UTF8'        => 'UTF-8',
3378
        'UTF'         => 'UTF-8',
3379 10
        'UTF7'        => 'UTF-7',
3380 10
        '8BIT'        => 'CP850',
3381 5
        'BINARY'      => 'CP850',
3382 5
    );
3383 10
3384
    if (!empty($equivalences[$encodingUpperHelper])) {
3385 10
      $encoding = $equivalences[$encodingUpperHelper];
3386
    }
3387
3388
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3389
3390
    return $encoding;
3391
  }
3392
3393
  /**
3394
   * Normalize some MS Word special characters.
3395
   *
3396 1
   * @param string $str <p>The string to be normalized.</p>
3397
   *
3398 1
   * @return string
3399 1
   */
3400 1 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3401
  {
3402 1
    // init
3403 1
    $str = (string)$str;
3404 1
3405 1
    if (!isset($str[0])) {
3406 1
      return '';
3407
    }
3408 1
3409
    static $UTF8_MSWORD_KEYS_CACHE = null;
3410
    static $UTF8_MSWORD_VALUES_CACHE = null;
3411
3412
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3413
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3414
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3415
    }
3416
3417
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3418
  }
3419
3420
  /**
3421
   * Normalize the whitespace.
3422
   *
3423
   * @param string $str                     <p>The string to be normalized.</p>
3424 45
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3425
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3426
   *                                        bidirectional text chars.</p>
3427 45
   *
3428
   * @return string
3429
   */
3430
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3431 45
  {
3432 45
    // init
3433 45
    $str = (string)$str;
3434 45
3435
    if (!isset($str[0])) {
3436 45
      return '';
3437
    }
3438
3439 45
    static $WHITESPACE_CACHE = array();
3440 45
    $cacheKey = (int)$keepNonBreakingSpace;
3441
3442 45
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3443
3444
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3445
3446
      if ($keepNonBreakingSpace === true) {
3447
        /** @noinspection OffsetOperationsInspection */
3448
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3449
      }
3450
3451
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3452
    }
3453 45
3454
    if ($keepBidiUnicodeControls === false) {
3455 45
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3456
3457 45
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3458 45
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3459 45
      }
3460
3461 45
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3462 45
    }
3463 45
3464
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3465 45
  }
3466
3467
  /**
3468
   * Format a number with grouped thousands.
3469
   *
3470
   * @param float  $number
3471
   * @param int    $decimals
3472
   * @param string $dec_point
3473
   * @param string $thousands_sep
3474
   *
3475
   * @return string
3476 23
   *    *
3477
   * @deprecated Because this has nothing to do with UTF8. :/
3478 23
   */
3479
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3480 23
  {
3481 5
    $thousands_sep = (string)$thousands_sep;
3482
    $dec_point = (string)$dec_point;
3483
3484
    if (
3485 19
        isset($thousands_sep[1], $dec_point[1])
3486 3
        &&
3487
        Bootup::is_php('5.4') === true
3488
    ) {
3489 18
      return str_replace(
3490
          array(
3491 18
              '.',
3492
              ',',
3493
          ),
3494
          array(
3495
              $dec_point,
3496
              $thousands_sep,
3497
          ),
3498
          number_format($number, $decimals, '.', ',')
3499
      );
3500
    }
3501
3502 52
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3503
  }
3504 52
3505
  /**
3506 52
   * Calculates Unicode code point of the given UTF-8 encoded character.
3507
   *
3508 52
   * INFO: opposite to UTF8::chr()
3509 40
   *
3510
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3511
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3512 18
   *
3513
   * @return int <p>
3514
   *             Unicode code point of the given character,<br />
3515 18
   *             0 on invalid UTF-8 byte sequence.
3516 17
   *             </p>
3517
   */
3518 17
  public static function ord($chr, $encoding = 'UTF-8')
3519 17
  {
3520 17
    if (!$chr && $chr !== '0') {
3521 2
      return 0;
3522 2
    }
3523
3524
    if ($encoding !== 'UTF-8') {
3525 18
      $encoding = self::normalize_encoding($encoding);
3526
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3527 18
    }
3528 18
3529 18
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3530
      self::checkForSupport();
3531 18
    }
3532 18
3533 18
    if (self::$support['intlChar'] === true) {
3534
      $tmpReturn = \IntlChar::ord($chr);
3535
      if ($tmpReturn) {
3536
        return $tmpReturn;
3537 18
      }
3538
    }
3539 18
3540
    // use static cache, if there is no support for "IntlChar"
3541
    static $cache = array();
3542
    if (isset($cache[$chr]) === true) {
3543
      return $cache[$chr];
3544
    }
3545
3546
    $chr_orig = $chr;
3547
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3548
    $chr = unpack('C*', substr($chr, 0, 4));
3549
    $code = $chr ? $chr[1] : 0;
3550
3551
    if (0xF0 <= $code && isset($chr[4])) {
3552
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3553
    }
3554
3555
    if (0xE0 <= $code && isset($chr[3])) {
3556
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3557
    }
3558
3559
    if (0xC0 <= $code && isset($chr[2])) {
3560 1
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3561
    }
3562 1
3563 1
    return $cache[$chr_orig] = $code;
3564
  }
3565
3566
  /**
3567
   * Parses the string into an array (into the the second parameter).
3568 1
   *
3569 1
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3570 1
   *          if the second parameter is not set!
3571 1
   *
3572
   * @link http://php.net/manual/en/function.parse-str.php
3573
   *
3574 1
   * @param string  $str       <p>The input string.</p>
3575
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3576
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3577
   *
3578
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3579
   */
3580
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3581
  {
3582
    if ($cleanUtf8 === true) {
3583
      $str = self::clean($str);
3584
    }
3585
3586 36
    $return = \mb_parse_str($str, $result);
3587
    if ($return === false || empty($result)) {
3588 36
      return false;
3589
    }
3590 36
3591 2
    return true;
3592
  }
3593
3594
  /**
3595 36
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3596 36
   *
3597
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3598 36
   */
3599
  public static function pcre_utf8_support()
3600
  {
3601
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3602 36
    return (bool)@preg_match('//u', '');
3603
  }
3604 36
3605 6
  /**
3606 6
   * Create an array containing a range of UTF-8 characters.
3607
   *
3608 36
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3609 36
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3610 36
   *
3611 36
   * @return array
3612 36
   */
3613
  public static function range($var1, $var2)
3614 36
  {
3615
    if (!$var1 || !$var2) {
3616
      return array();
3617
    }
3618
3619 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3620
      $start = (int)$var1;
3621
    } elseif (ctype_xdigit($var1)) {
3622
      $start = (int)self::hex_to_int($var1);
3623
    } else {
3624
      $start = self::ord($var1);
3625
    }
3626
3627
    if (!$start) {
3628
      return array();
3629
    }
3630
3631 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3632
      $end = (int)$var2;
3633
    } elseif (ctype_xdigit($var2)) {
3634
      $end = (int)self::hex_to_int($var2);
3635
    } else {
3636
      $end = self::ord($var2);
3637
    }
3638
3639
    if (!$end) {
3640
      return array();
3641
    }
3642
3643
    return array_map(
3644
        array(
3645
            '\\voku\\helper\\UTF8',
3646 36
            'chr',
3647 5
        ),
3648
        range($start, $end)
3649 5
    );
3650 5
  }
3651
3652
  /**
3653 36
   * Multi decode html entity & fix urlencoded-win1252-chars.
3654
   *
3655
   * e.g:
3656
   * 'test+test'                     => 'test+test'
3657 36
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3658
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3659
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3660
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3661
   * 'Düsseldorf'                   => 'Düsseldorf'
3662
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3663
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3664
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3665
   *
3666
   * @param string $str          <p>The input string.</p>
3667
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3668
   *
3669
   * @return string
3670 12
   */
3671 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3672
  {
3673
    $str = (string)$str;
3674
3675
    if (!isset($str[0])) {
3676 12
      return '';
3677 2
    }
3678 1
3679 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3680 1
    if (preg_match($pattern, $str)) {
3681 2
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3682
    }
3683 2
3684
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3685
3686 2
    do {
3687
      $str_compare = $str;
3688
3689
      $str = self::fix_simple_utf8(
3690
          rawurldecode(
3691
              self::html_entity_decode(
3692 12
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3693 3
                  $flags
3694
              )
3695
          )
3696
      );
3697
3698
    } while ($multi_decode === true && $str_compare !== $str);
3699
3700 12
    return (string)$str;
3701 9
  }
3702
3703
  /**
3704
   * alias for "UTF8::remove_bom()"
3705
   *
3706
   * @see UTF8::remove_bom()
3707
   *
3708
   * @param string $str
3709
   *
3710 6
   * @return string
3711 6
   *
3712 6
   * @deprecated
3713 6
   */
3714 6
  public static function removeBOM($str)
3715 6
  {
3716 6
    return self::remove_bom($str);
3717 6
  }
3718 6
3719 6
  /**
3720 6
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3721 6
   *
3722 6
   * @param string $str <p>The input string.</p>
3723 6
   *
3724 6
   * @return string <p>String without UTF-BOM</p>
3725 6
   */
3726 6
  public static function remove_bom($str)
3727 6
  {
3728 6
    foreach (self::$bom as $bomString => $bomByteLength) {
3729 6
      if (0 === strpos($str, $bomString)) {
3730 6
        $str = substr($str, $bomByteLength);
3731
      }
3732 6
    }
3733 6
3734 6
    return $str;
3735
  }
3736
3737
  /**
3738
   * Removes duplicate occurrences of a string in another string.
3739
   *
3740
   * @param string          $str  <p>The base string.</p>
3741
   * @param string|string[] $what <p>String to search for in the base string.</p>
3742
   *
3743
   * @return string <p>The result string with removed duplicates.</p>
3744
   */
3745
  public static function remove_duplicates($str, $what = ' ')
3746
  {
3747
    if (is_string($what)) {
3748
      $what = array($what);
3749
    }
3750
3751
    if (is_array($what)) {
3752
      /** @noinspection ForeachSourceInspection */
3753
      foreach ($what as $item) {
3754
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3755
      }
3756
    }
3757
3758
    return $str;
3759
  }
3760
3761
  /**
3762
   * Remove invisible characters from a string.
3763
   *
3764
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3765
   *
3766
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3767
   *
3768
   * @param string $str
3769
   * @param bool   $url_encoded
3770
   * @param string $replacement
3771
   *
3772
   * @return string
3773
   */
3774
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3775
  {
3776
    // init
3777
    $non_displayables = array();
3778 14
3779
    // every control character except newline (dec 10),
3780 14
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3781
    if ($url_encoded) {
3782
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3783 14
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3784 14
    }
3785 1
3786 1
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3787 13
3788
    do {
3789 14
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3790
    } while ($count !== 0);
3791 14
3792 14
    return $str;
3793
  }
3794 14
3795
  /**
3796
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3797
   *
3798
   * @param string $str                <p>The input string</p>
3799
   * @param string $replacementChar    <p>The replacement character.</p>
3800
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3801
   *
3802
   * @return string
3803
   */
3804
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
3805
  {
3806 1
    $str = (string)$str;
3807
3808 1
    if (!isset($str[0])) {
3809
      return '';
3810 1
    }
3811
3812
    if ($processInvalidUtf8 === true) {
3813
      $replacementCharHelper = $replacementChar;
3814 1
      if ($replacementChar === '') {
3815
        $replacementCharHelper = 'none';
3816 1
      }
3817
3818
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3819
        self::checkForSupport();
3820 1
      }
3821 1
3822
      if (self::$support['mbstring'] === false) {
3823
        trigger_error('UTF8::replace_diamond_question_mark() without mbstring cannot handle all chars correctly', E_USER_WARNING);
3824 1
      }
3825 1
3826 1
      $save = \mb_substitute_character();
3827 1
      \mb_substitute_character($replacementCharHelper);
3828
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3829 1
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3830
      \mb_substitute_character($save);
3831
    }
3832 1
3833
    return str_replace(
3834
        array(
3835 1
            "\xEF\xBF\xBD",
3836
            '�',
3837
        ),
3838
        array(
3839
            $replacementChar,
3840
            $replacementChar,
3841
        ),
3842
        $str
3843
    );
3844
  }
3845
3846
  /**
3847
   * Strip whitespace or other characters from end of a UTF-8 string.
3848
   *
3849
   * @param string $str   <p>The string to be trimmed.</p>
3850
   * @param string $chars <p>Optional characters to be stripped.</p>
3851 2
   *
3852
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3853 2
   */
3854 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3855
  {
3856 2
    $str = (string)$str;
3857 2
3858
    if (!isset($str[0])) {
3859 2
      return '';
3860
    }
3861 2
3862 2
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3863
    if ($chars === INF || !$chars) {
3864 2
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3865
    }
3866
3867 2
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3868 2
  }
3869 2
3870 2
  /**
3871 2
   * rxClass
3872
   *
3873 2
   * @param string $s
3874 2
   * @param string $class
3875 2
   *
3876 2
   * @return string
3877 2
   */
3878 2
  private static function rxClass($s, $class = '')
3879
  {
3880 2
    static $rxClassCache = array();
3881 2
3882 2
    $cacheKey = $s . $class;
3883 2
3884 2
    if (isset($rxClassCache[$cacheKey])) {
3885 2
      return $rxClassCache[$cacheKey];
3886
    }
3887 2
3888
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3889
    $class = array($class);
3890 2
3891
    /** @noinspection SuspiciousLoopInspection */
3892
    foreach (self::str_split($s) as $s) {
3893
      if ('-' === $s) {
3894
        $class[0] = '-' . $class[0];
3895
      } elseif (!isset($s[2])) {
3896
        $class[0] .= preg_quote($s, '/');
3897
      } elseif (1 === self::strlen($s)) {
3898
        $class[0] .= $s;
3899
      } else {
3900
        $class[] = $s;
3901
      }
3902
    }
3903
3904
    if ($class[0]) {
3905
      $class[0] = '[' . $class[0] . ']';
3906
    }
3907
3908
    if (1 === count($class)) {
3909
      $return = $class[0];
3910
    } else {
3911 1
      $return = '(?:' . implode('|', $class) . ')';
3912
    }
3913 1
3914
    $rxClassCache[$cacheKey] = $return;
3915 1
3916
    return $return;
3917
  }
3918
3919
  /**
3920
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3921
   */
3922
  public static function showSupport()
3923
  {
3924
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3925
      self::checkForSupport();
3926
    }
3927
3928
    foreach (self::$support as $utf8Support) {
3929
      echo $utf8Support . "\n<br>";
3930
    }
3931
  }
3932
3933
  /**
3934
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3935
   *
3936
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3937
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3938
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3939
   *
3940
   * @return string <p>The HTML numbered entity.</p>
3941
   */
3942
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3943
  {
3944
    // init
3945
    $char = (string)$char;
3946
3947 12
    if (!isset($char[0])) {
3948
      return '';
3949 12
    }
3950
3951
    if (
3952
        $keepAsciiChars === true
3953
        &&
3954
        self::is_ascii($char) === true
3955
    ) {
3956
      return $char;
3957
    }
3958
3959 1
    if ($encoding !== 'UTF-8') {
3960
      $encoding = self::normalize_encoding($encoding);
3961 1
    }
3962
3963 1
    return '&#' . self::ord($char, $encoding) . ';';
3964
  }
3965 1
3966
  /**
3967
   * Convert a string to an array of Unicode characters.
3968
   *
3969
   * @param string  $str       <p>The string to split into array.</p>
3970
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3971
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3972
   *
3973
   * @return string[] <p>An array containing chunks of the string.</p>
3974
   */
3975
  public static function split($str, $length = 1, $cleanUtf8 = false)
3976
  {
3977 1
    $str = (string)$str;
3978
3979 1
    if (!isset($str[0])) {
3980
      return array();
3981 1
    }
3982 1
3983 1
    // init
3984
    $str = (string)$str;
3985 1
    $ret = array();
3986 1
3987 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3988 1
      self::checkForSupport();
3989
    }
3990
3991 1
    if (self::$support['pcre_utf8'] === true) {
3992
3993
      if ($cleanUtf8 === true) {
3994
        $str = self::clean($str);
3995
      }
3996
3997
      preg_match_all('/./us', $str, $retArray);
3998
      if (isset($retArray[0])) {
3999
        $ret = $retArray[0];
4000
      }
4001
      unset($retArray);
4002 21
4003
    } else {
4004
4005 21
      // fallback
4006 21
4007
      $len = strlen($str);
4008 21
4009 1
      /** @noinspection ForeachInvariantsInspection */
4010
      for ($i = 0; $i < $len; $i++) {
4011
        if (($str[$i] & "\x80") === "\x00") {
4012 20
          $ret[] = $str[$i];
4013
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4014
          if (($str[$i + 1] & "\xC0") === "\x80") {
4015
            $ret[] = $str[$i] . $str[$i + 1];
4016 20
4017 20
            $i++;
4018
          }
4019 20 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4020 20
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4021
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4022
4023 1
            $i += 2;
4024 1
          }
4025
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4026 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4027 1
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4028 1
4029 1
            $i += 3;
4030 1
          }
4031 1
        }
4032
      }
4033 1
    }
4034
4035 1
    if ($length > 1) {
4036
      $ret = array_chunk($ret, $length);
4037
4038
      return array_map(
4039
          function ($item) {
4040
            return implode('', $item);
4041
          }, $ret
4042
      );
4043
    }
4044
4045 1
    /** @noinspection OffsetOperationsInspection */
4046
    if (isset($ret[0]) && $ret[0] === '') {
4047 1
      return array();
4048
    }
4049 1
4050
    return $ret;
4051 1
  }
4052
4053
  /**
4054
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4055
   *
4056
   * @param string $str <p>The input string.</p>
4057
   *
4058
   * @return false|string <p>
4059
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4060
   *                      otherwise it will return false.
4061
   *                      </p>
4062
   */
4063
  public static function str_detect_encoding($str)
4064
  {
4065 7
    //
4066
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4067 7
    //
4068
4069
    if (self::is_binary($str)) {
4070
      if (self::is_utf16($str) === 1) {
4071
        return 'UTF-16LE';
4072
      } elseif (self::is_utf16($str) === 2) {
4073
        return 'UTF-16BE';
4074
      } elseif (self::is_utf32($str) === 1) {
4075
        return 'UTF-32LE';
4076
      } elseif (self::is_utf32($str) === 2) {
4077
        return 'UTF-32BE';
4078
      }
4079
    }
4080
4081
    //
4082
    // 2.) simple check for ASCII chars
4083 1
    //
4084
4085 1
    if (self::is_ascii($str) === true) {
4086 1
      return 'ASCII';
4087
    }
4088 1
4089
    //
4090 1
    // 3.) simple check for UTF-8 chars
4091
    //
4092 1
4093 1
    if (self::is_utf8($str) === true) {
4094 1
      return 'UTF-8';
4095 1
    }
4096
4097 1
    //
4098
    // 4.) check via "\mb_detect_encoding()"
4099 1
    //
4100 1
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4101 1
4102 1
    $detectOrder = array(
4103 1
        'ISO-8859-1',
4104 1
        'ISO-8859-2',
4105
        'ISO-8859-3',
4106 1
        'ISO-8859-4',
4107
        'ISO-8859-5',
4108 1
        'ISO-8859-6',
4109
        'ISO-8859-7',
4110
        'ISO-8859-8',
4111
        'ISO-8859-9',
4112 1
        'ISO-8859-10',
4113
        'ISO-8859-13',
4114
        'ISO-8859-14',
4115
        'ISO-8859-15',
4116
        'ISO-8859-16',
4117
        'WINDOWS-1251',
4118
        'WINDOWS-1252',
4119
        'WINDOWS-1254',
4120
        'ISO-2022-JP',
4121
        'JIS',
4122
        'EUC-JP',
4123
    );
4124
4125
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4126
    if ($encoding) {
4127
      return $encoding;
4128
    }
4129 9
4130
    //
4131 9
    // 5.) check via "iconv()"
4132
    //
4133
4134
    $md5 = md5($str);
4135
    foreach (self::$iconvEncoding as $encodingTmp) {
4136
      # INFO: //IGNORE and //TRANSLIT still throw notice
4137
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4138
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4139
        return $encodingTmp;
4140
      }
4141
    }
4142
4143
    return false;
4144
  }
4145
4146
  /**
4147 1
   * Check if the string ends with the given substring.
4148
   *
4149 1
   * @param string $haystack <p>The string to search in.</p>
4150
   * @param string $needle   <p>The substring to search for.</p>
4151
   *
4152
   * @return bool
4153
   */
4154 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4155
  {
4156
    $haystack = (string)$haystack;
4157
    $needle = (string)$needle;
4158
4159
    if (!isset($haystack[0], $needle[0])) {
4160
      return false;
4161
    }
4162
4163
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4164 12
      return true;
4165
    }
4166 12
4167 11
    return false;
4168 11
  }
4169 12
4170
  /**
4171
   * Check if the string ends with the given substring, case insensitive.
4172
   *
4173
   * @param string $haystack <p>The string to search in.</p>
4174
   * @param string $needle   <p>The substring to search for.</p>
4175
   *
4176
   * @return bool
4177
   */
4178 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4179
  {
4180
    $haystack = (string)$haystack;
4181
    $needle = (string)$needle;
4182 9
4183
    if (!isset($haystack[0], $needle[0])) {
4184 9
      return false;
4185 1
    }
4186
4187
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4188 8
      return true;
4189 2
    }
4190 2
4191
    return false;
4192 8
  }
4193 8
4194 1
  /**
4195
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4196
   *
4197 7
   * @link  http://php.net/manual/en/function.str-ireplace.php
4198
   *
4199 7
   * @param mixed $search  <p>
4200
   *                       Every replacement with search array is
4201
   *                       performed on the result of previous replacement.
4202 1
   *                       </p>
4203
   * @param mixed $replace <p>
4204
   *                       </p>
4205
   * @param mixed $subject <p>
4206
   *                       If subject is an array, then the search and
4207
   *                       replace is performed with every entry of
4208
   *                       subject, and the return value is an array as
4209
   *                       well.
4210
   *                       </p>
4211
   * @param int   $count   [optional] <p>
4212
   *                       The number of matched and replaced needles will
4213
   *                       be returned in count which is passed by
4214
   *                       reference.
4215
   *                       </p>
4216
   *
4217
   * @return mixed <p>A string or an array of replacements.</p>
4218 1
   */
4219
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4220 1
  {
4221
    $search = (array)$search;
4222
4223
    /** @noinspection AlterInForeachInspection */
4224
    foreach ($search as &$s) {
4225
      if ('' === $s .= '') {
4226
        $s = '/^(?<=.)$/';
4227
      } else {
4228
        $s = '/' . preg_quote($s, '/') . '/ui';
4229
      }
4230
    }
4231
4232 2
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4233
    $count = $replace; // used as reference parameter
4234 2
4235 2
    return $subject;
4236
  }
4237 2
4238 2
  /**
4239 2
   * Check if the string starts with the given substring, case insensitive.
4240
   *
4241 2
   * @param string $haystack <p>The string to search in.</p>
4242 2
   * @param string $needle   <p>The substring to search for.</p>
4243
   *
4244
   * @return bool
4245
   */
4246 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4247
  {
4248
    $haystack = (string)$haystack;
4249
    $needle = (string)$needle;
4250
4251
    if (!isset($haystack[0], $needle[0])) {
4252 3
      return false;
4253
    }
4254 3
4255 3
    if (self::stripos($haystack, $needle) === 0) {
4256 3
      return true;
4257
    }
4258 3
4259
    return false;
4260 3
  }
4261
4262
  /**
4263
   * Limit the number of characters in a string, but also after the next word.
4264
   *
4265
   * @param string $str
4266
   * @param int    $length
4267
   * @param string $strAddOn
4268
   *
4269
   * @return string
4270
   */
4271
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4272
  {
4273
    $str = (string)$str;
4274
4275
    if (!isset($str[0])) {
4276
      return '';
4277
    }
4278
4279
    $length = (int)$length;
4280
4281
    if (self::strlen($str) <= $length) {
4282 2
      return $str;
4283
    }
4284
4285 2
    if (self::substr($str, $length - 1, 1) === ' ') {
4286
      return self::substr($str, 0, $length - 1) . $strAddOn;
4287 2
    }
4288
4289
    $str = self::substr($str, 0, $length);
4290
    $array = explode(' ', $str);
4291
    array_pop($array);
4292
    $new_str = implode(' ', $array);
4293
4294
    if ($new_str === '') {
4295
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4296
    } else {
4297
      $str = $new_str . $strAddOn;
4298
    }
4299
4300
    return $str;
4301
  }
4302
4303
  /**
4304
   * Pad a UTF-8 string to given length with another string.
4305
   *
4306
   * @param string $str        <p>The input string.</p>
4307
   * @param int    $pad_length <p>The length of return string.</p>
4308
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4309
   * @param int    $pad_type   [optional] <p>
4310
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4311
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4312
   *                           </p>
4313
   *
4314 8
   * @return string <strong>Returns the padded string</strong>
4315
   */
4316 8
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4317 8
  {
4318
    $str_length = self::strlen($str);
4319 8
4320 3
    if (
4321
        is_int($pad_length) === true
4322
        &&
4323 7
        $pad_length > 0
4324 1
        &&
4325 1
        $pad_length >= $str_length
4326 1
    ) {
4327
      $ps_length = self::strlen($pad_string);
4328
4329
      $diff = $pad_length - $str_length;
4330 7
4331 1
      switch ($pad_type) {
4332 7 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4333 7
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4334 7
          $pre = self::substr($pre, 0, $diff);
4335
          $post = '';
4336
          break;
4337
4338 7
        case STR_PAD_BOTH:
4339
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4340
          $pre = self::substr($pre, 0, (int)$diff / 2);
4341
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4342
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4343
          break;
4344
4345
        case STR_PAD_RIGHT:
4346 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4347
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4348
          $post = self::substr($post, 0, $diff);
4349
          $pre = '';
4350
      }
4351
4352
      return $pre . $str . $post;
4353
    }
4354
4355 8
    return $str;
4356
  }
4357 8
4358 2
  /**
4359
   * Repeat a string.
4360
   *
4361 6
   * @param string $str        <p>
4362
   *                           The string to be repeated.
4363
   *                           </p>
4364
   * @param int    $multiplier <p>
4365 6
   *                           Number of time the input string should be
4366
   *                           repeated.
4367
   *                           </p>
4368
   *                           <p>
4369
   *                           multiplier has to be greater than or equal to 0.
4370
   *                           If the multiplier is set to 0, the function
4371
   *                           will return an empty string.
4372 6
   *                           </p>
4373
   *
4374
   * @return string <p>The repeated string.</p>
4375
   */
4376
  public static function str_repeat($str, $multiplier)
4377
  {
4378
    $str = self::filter($str);
4379
4380
    return str_repeat($str, $multiplier);
4381
  }
4382
4383
  /**
4384
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4385
   *
4386
   * Replace all occurrences of the search string with the replacement string
4387 62
   *
4388
   * @link http://php.net/manual/en/function.str-replace.php
4389 62
   *
4390
   * @param mixed $search  <p>
4391 62
   *                       The value being searched for, otherwise known as the needle.
4392 4
   *                       An array may be used to designate multiple needles.
4393
   *                       </p>
4394
   * @param mixed $replace <p>
4395
   *                       The replacement value that replaces found search
4396
   *                       values. An array may be used to designate multiple replacements.
4397 61
   *                       </p>
4398 2
   * @param mixed $subject <p>
4399 61
   *                       The string or array being searched and replaced on,
4400 60
   *                       otherwise known as the haystack.
4401 60
   *                       </p>
4402 2
   *                       <p>
4403
   *                       If subject is an array, then the search and
4404
   *                       replace is performed with every entry of
4405
   *                       subject, and the return value is an array as
4406 61
   *                       well.
4407 61
   *                       </p>
4408 1
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4409
   *
4410
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4411 61
   */
4412 2
  public static function str_replace($search, $replace, $subject, &$count = null)
4413 2
  {
4414
    return str_replace($search, $replace, $subject, $count);
4415 61
  }
4416
4417
  /**
4418
   * Replace the first "$search"-term with the "$replace"-term.
4419
   *
4420
   * @param string $search
4421
   * @param string $replace
4422
   * @param string $subject
4423
   *
4424
   * @return string
4425
   */
4426
  public static function str_replace_first($search, $replace, $subject)
4427
  {
4428
    $pos = self::strpos($subject, $search);
4429
4430 1
    if ($pos !== false) {
4431
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4432 1
    }
4433
4434
    return $subject;
4435
  }
4436
4437
  /**
4438
   * Shuffles all the characters in the string.
4439
   *
4440
   * @param string $str <p>The input string</p>
4441
   *
4442
   * @return string <p>The shuffled string.</p>
4443
   */
4444
  public static function str_shuffle($str)
4445
  {
4446
    $array = self::split($str);
4447
4448
    shuffle($array);
4449 2
4450
    return implode('', $array);
4451 2
  }
4452
4453
  /**
4454
   * Sort all characters according to code points.
4455
   *
4456
   * @param string $str    <p>A UTF-8 string.</p>
4457
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4458
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4459
   *
4460
   * @return string <p>String of sorted characters.</p>
4461
   */
4462
  public static function str_sort($str, $unique = false, $desc = false)
4463
  {
4464
    $array = self::codepoints($str);
4465
4466
    if ($unique) {
4467 1
      $array = array_flip(array_flip($array));
4468
    }
4469 1
4470
    if ($desc) {
4471
      arsort($array);
4472
    } else {
4473
      asort($array);
4474
    }
4475
4476
    return self::string($array);
4477
  }
4478
4479
  /**
4480
   * Split a string into an array.
4481
   *
4482
   * @param string $str
4483
   * @param int    $len
4484
   *
4485 2
   * @return array
4486
   */
4487 2
  public static function str_split($str, $len = 1)
4488 2
  {
4489
    // init
4490 2
    $len = (int)$len;
4491
    $str = (string)$str;
4492
4493
    if (!isset($str[0])) {
4494
      return array();
4495
    }
4496
4497
    if ($len < 1) {
4498
      return str_split($str, $len);
4499
    }
4500
4501
    /** @noinspection PhpInternalEntityUsedInspection */
4502
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4503 1
    $a = $a[0];
4504
4505 1
    if ($len === 1) {
4506 1
      return $a;
4507
    }
4508 1
4509 1
    $arrayOutput = array();
4510
    $p = -1;
4511
4512 1
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4513 1
    foreach ($a as $l => $a) {
4514
      if ($l % $len) {
4515 1
        $arrayOutput[$p] .= $a;
4516
      } else {
4517
        $arrayOutput[++$p] = $a;
4518
      }
4519
    }
4520
4521
    return $arrayOutput;
4522
  }
4523
4524
  /**
4525
   * Check if the string starts with the given substring.
4526
   *
4527
   * @param string $haystack <p>The string to search in.</p>
4528
   * @param string $needle   <p>The substring to search for.</p>
4529
   *
4530
   * @return bool
4531
   */
4532 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4533
  {
4534
    $haystack = (string)$haystack;
4535 15
    $needle = (string)$needle;
4536
4537 15
    if (!isset($haystack[0], $needle[0])) {
4538 15
      return false;
4539
    }
4540 15
4541 2
    if (self::strpos($haystack, $needle) === 0) {
4542
      return true;
4543
    }
4544
4545 14
    return false;
4546
  }
4547
4548
  /**
4549 14
   * Get a binary representation of a specific string.
4550
   *
4551
   * @param string $str <p>The input string.</p>
4552
   *
4553 14
   * @return string
4554
   */
4555
  public static function str_to_binary($str)
4556 2
  {
4557 2
    $str = (string)$str;
4558 2
4559
    $value = unpack('H*', $str);
4560 14
4561
    return base_convert($value[1], 16, 2);
4562
  }
4563
4564
  /**
4565
   * Convert a string into an array of words.
4566 14
   *
4567 2
   * @param string $str
4568 14
   * @param string $charlist
4569 14
   *
4570 14
   * @return array
4571 1
   */
4572
  public static function str_to_words($str, $charlist = '')
4573
  {
4574 14
    $str = (string)$str;
4575 14
4576
    if (!isset($str[0])) {
4577
      return array('');
4578
    }
4579
4580
    $charlist = self::rxClass($charlist, '\pL');
4581
4582
    return \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4583
  }
4584
4585
  /**
4586
   * alias for "UTF8::to_ascii()"
4587
   *
4588
   * @see UTF8::to_ascii()
4589
   *
4590
   * @param string $str
4591
   * @param string $unknown
4592
   * @param bool   $strict
4593
   *
4594
   * @return string
4595
   */
4596
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4597
  {
4598
    return self::to_ascii($str, $unknown, $strict);
4599
  }
4600
4601
  /**
4602
   * Counts number of words in the UTF-8 string.
4603
   *
4604
   * @param string $str      <p>The input string.</p>
4605
   * @param int    $format   [optional] <p>
4606
   *                         <strong>0</strong> => return a number of words (default)<br />
4607
   *                         <strong>1</strong> => return an array of words<br />
4608
   *                         <strong>2</strong> => return an array of words with word-offset as key
4609
   *                         </p>
4610
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4611
   *
4612
   * @return array|int <p>The number of words in the string</p>
4613
   */
4614
  public static function str_word_count($str, $format = 0, $charlist = '')
4615
  {
4616
    $strParts = self::str_to_words($str, $charlist);
4617
4618
    $len = count($strParts);
4619
4620 1
    if ($format === 1) {
4621
4622 1
      $numberOfWords = array();
4623 1
      for ($i = 1; $i < $len; $i += 2) {
4624 1
        $numberOfWords[] = $strParts[$i];
4625
      }
4626 1
4627
    } elseif ($format === 2) {
4628
4629
      $numberOfWords = array();
4630
      $offset = self::strlen($strParts[0]);
4631
      for ($i = 1; $i < $len; $i += 2) {
4632
        $numberOfWords[$offset] = $strParts[$i];
4633 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4634
      }
4635
4636
    } else {
4637
4638
      $numberOfWords = ($len - 1) / 2;
4639
4640
    }
4641
4642
    return $numberOfWords;
4643 4
  }
4644
4645 4
  /**
4646
   * Case-insensitive string comparison.
4647 4
   *
4648 2
   * INFO: Case-insensitive version of UTF8::strcmp()
4649
   *
4650
   * @param string $str1
4651 3
   * @param string $str2
4652
   *
4653
   * @return int <p>
4654
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4655
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4656
   *             <strong>0</strong> if they are equal.
4657
   *             </p>
4658
   */
4659
  public static function strcasecmp($str1, $str2)
4660
  {
4661
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4662
  }
4663
4664
  /**
4665
   * alias for "UTF8::strstr()"
4666
   *
4667
   * @see UTF8::strstr()
4668
   *
4669
   * @param string  $haystack
4670
   * @param string  $needle
4671
   * @param bool    $before_needle
4672
   * @param string  $encoding
4673
   * @param boolean $cleanUtf8
4674
   *
4675
   * @return string|false
4676
   */
4677 1
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4678
  {
4679 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4680 1
  }
4681 1
4682
  /**
4683 1
   * Case-sensitive string comparison.
4684
   *
4685
   * @param string $str1
4686
   * @param string $str2
4687
   *
4688
   * @return int  <p>
4689
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4690 1
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4691
   *              <strong>0</strong> if they are equal.
4692
   *              </p>
4693
   */
4694
  public static function strcmp($str1, $str2)
4695
  {
4696
    /** @noinspection PhpUndefinedClassInspection */
4697
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4698
        \Normalizer::normalize($str1, \Normalizer::NFD),
4699
        \Normalizer::normalize($str2, \Normalizer::NFD)
4700
    );
4701
  }
4702
4703
  /**
4704
   * Find length of initial segment not matching mask.
4705
   *
4706
   * @param string $str
4707 1
   * @param string $charList
4708
   * @param int    $offset
4709 1
   * @param int    $length
4710
   *
4711
   * @return int|null
4712
   */
4713
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4714
  {
4715
    if ('' === $charList .= '') {
4716
      return null;
4717
    }
4718
4719
    if ($offset || 2147483647 !== $length) {
4720
      $str = (string)self::substr($str, $offset, $length);
4721
    }
4722
4723
    $str = (string)$str;
4724
    if (!isset($str[0])) {
4725
      return null;
4726
    }
4727
4728
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4729 11
      /** @noinspection OffsetOperationsInspection */
4730
      return self::strlen($length[1]);
4731 11
    }
4732
4733 11
    return self::strlen($str);
4734 2
  }
4735 2
4736
  /**
4737 11
   * alias for "UTF8::stristr()"
4738
   *
4739 11
   * @see UTF8::stristr()
4740 2
   *
4741
   * @param string  $haystack
4742
   * @param string  $needle
4743
   * @param bool    $before_needle
4744 10
   * @param string  $encoding
4745 10
   * @param boolean $cleanUtf8
4746
   *
4747
   * @return string|false
4748
   */
4749 10
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4750
  {
4751 10
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4752
  }
4753
4754 3
  /**
4755 3
   * Create a UTF-8 string from code points.
4756 3
   *
4757
   * INFO: opposite to UTF8::codepoints()
4758 10
   *
4759
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4760
   *
4761
   * @return string <p>UTF-8 encoded string.</p>
4762
   */
4763
  public static function string(array $array)
4764 10
  {
4765 1
    return implode(
4766 10
        '',
4767 10
        array_map(
4768 10
            array(
4769 1
                '\\voku\\helper\\UTF8',
4770
                'chr',
4771
            ),
4772
            $array
4773
        )
4774 10
    );
4775 10
  }
4776 10
4777 10
  /**
4778
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4779
   *
4780
   * @param string $str <p>The input string.</p>
4781
   *
4782
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4783
   */
4784
  public static function string_has_bom($str)
4785
  {
4786
    foreach (self::$bom as $bomString => $bomByteLength) {
4787
      if (0 === strpos($str, $bomString)) {
4788
        return true;
4789
      }
4790
    }
4791
4792
    return false;
4793
  }
4794
4795
  /**
4796
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4797
   *
4798
   * @link http://php.net/manual/en/function.strip-tags.php
4799
   *
4800
   * @param string  $str            <p>
4801
   *                                The input string.
4802
   *                                </p>
4803
   * @param string  $allowable_tags [optional] <p>
4804
   *                                You can use the optional second parameter to specify tags which should
4805
   *                                not be stripped.
4806
   *                                </p>
4807
   *                                <p>
4808
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4809
   *                                can not be changed with allowable_tags.
4810
   *                                </p>
4811
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
4812
   *
4813 10
   * @return string <p>The stripped string.</p>
4814
   */
4815
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
4816 10
  {
4817 10
    if ($cleanUtf8) {
4818
      $str = self::clean($str);
4819 10
    }
4820 2
4821 2
    return strip_tags($str, $allowable_tags);
4822
  }
4823 10
4824 10
  /**
4825 2
   * Finds position of first occurrence of a string within another, case insensitive.
4826
   *
4827
   * @link http://php.net/manual/en/function.mb-stripos.php
4828 8
   *
4829
   * @param string  $haystack  <p>
4830
   *                           The string from which to get the position of the first occurrence
4831
   *                           of needle
4832
   *                           </p>
4833
   * @param string  $needle    <p>
4834
   *                           The string to find in haystack
4835
   *                           </p>
4836
   * @param int     $offset    [optional] <p>
4837
   *                           The position in haystack
4838
   *                           to start searching
4839
   *                           </p>
4840
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4841
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4842
   *
4843
   * @return int|false <p>
4844
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4845 2
   *                   or false if needle is not found.
4846
   *                   </p>
4847 2
   */
4848
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4849
  {
4850
    $haystack = (string)$haystack;
4851
    $needle = (string)$needle;
4852
4853
    if (!isset($haystack[0], $needle[0])) {
4854 2
      return false;
4855 1
    }
4856 1
4857
    if ($cleanUtf8 === true) {
4858
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4859
      // if invalid characters are found in $haystack before $needle
4860 2
      $haystack = self::clean($haystack);
4861 2
      $needle = self::clean($needle);
4862 2
    }
4863 2
4864 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4865
        $encoding === 'UTF-8'
4866
        ||
4867
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4868
    ) {
4869
      $encoding = 'UTF-8';
4870
    } else {
4871
      $encoding = self::normalize_encoding($encoding);
4872
    }
4873
4874
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4875
      self::checkForSupport();
4876
    }
4877
4878
    if (
4879
        $encoding == 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4880
        &&
4881
        self::$support['intl'] === true
4882 11
        &&
4883
        Bootup::is_php('5.4')
4884 11
    ) {
4885 11
      return \grapheme_stripos($haystack, $needle, $offset);
4886 11
    }
4887
4888 11
    // fallback to "mb_"-function via polyfill
4889 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4890 1
  }
4891 1
4892
  /**
4893 11
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4894
   *
4895 11
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4896
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4897 11
   * @param bool    $before_needle [optional] <p>
4898 1
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4899 1
   *                               haystack before the first occurrence of the needle (excluding the needle).
4900
   *                               </p>
4901
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4902 11
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4903 11
   *
4904
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4905 11
   */
4906
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4907 11
  {
4908
    $haystack = (string)$haystack;
4909
    $needle = (string)$needle;
4910
4911
    if (!isset($haystack[0], $needle[0])) {
4912
      return false;
4913
    }
4914
4915
    if ($encoding !== 'UTF-8') {
4916
      $encoding = self::normalize_encoding($encoding);
4917
    }
4918
4919
    if ($cleanUtf8 === true) {
4920
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4921 21
      // if invalid characters are found in $haystack before $needle
4922
      $needle = self::clean($needle);
4923
      $haystack = self::clean($haystack);
4924 21
    }
4925
4926 21
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4927 6
      self::checkForSupport();
4928
    }
4929
4930 19 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4931
        $encoding !== 'UTF-8'
4932
        &&
4933
        self::$support['mbstring'] === false
4934
    ) {
4935
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4936 19
    }
4937 2
4938 2
    if (self::$support['mbstring'] === true) {
4939
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4940 19
    }
4941
4942
    if (self::$support['intl'] === true) {
4943
      return \grapheme_stristr($haystack, $needle, $before_needle);
4944
    }
4945
4946
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4947
4948
    if (!isset($match[1])) {
4949
      return false;
4950 3
    }
4951
4952 3
    if ($before_needle) {
4953
      return $match[1];
4954
    }
4955
4956
    return self::substr($haystack, self::strlen($match[1]));
4957
  }
4958
4959
  /**
4960
   * Get the string length, not the byte-length!
4961
   *
4962
   * @link     http://php.net/manual/en/function.mb-strlen.php
4963
   *
4964
   * @param string  $str       <p>The string being checked for length.</p>
4965
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4966 16
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4967
   *
4968 16
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4969
   *             character counted as +1)</p>
4970 16
   */
4971 2
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4972
  {
4973
    $str = (string)$str;
4974 15
4975
    if (!isset($str[0])) {
4976
      return 0;
4977
    }
4978
4979 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4980 15
        $encoding === 'UTF-8'
4981 2
        ||
4982 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4983
    ) {
4984 15
      $encoding = 'UTF-8';
4985
    } else {
4986
      $encoding = self::normalize_encoding($encoding);
4987
    }
4988
4989
    switch ($encoding) {
4990
      case 'ASCII':
4991
      case 'CP850':
4992
        return strlen($str);
4993
    }
4994
4995
    if ($cleanUtf8 === true) {
4996
      $str = self::clean($str);
4997
    }
4998
4999
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5000
      self::checkForSupport();
5001 1
    }
5002
5003 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5004 1
        $encoding !== 'UTF-8'
5005 1
        &&
5006 1
        self::$support['mbstring'] === false
5007 1
        &&
5008
        self::$support['iconv'] === false
5009 1
    ) {
5010 1
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5011 1
    }
5012 1
5013 1
    if (
5014
        $encoding !== 'UTF-8'
5015 1
        &&
5016 1
        self::$support['iconv'] === true
5017
        &&
5018 1
        self::$support['mbstring'] === false
5019
    ) {
5020
      $returnTmp = \iconv_strlen($str, $encoding);
5021
      if ($returnTmp !== false) {
5022
        return $returnTmp;
5023
      }
5024
    }
5025
5026
    if (self::$support['mbstring'] === true) {
5027
      return \mb_strlen($str, $encoding);
5028
    }
5029
5030 1
    if (self::$support['intl'] === true) {
5031
      $str = self::clean($str);
5032 1
      $returnTmp = \grapheme_strlen($str);
5033 1
      if ($returnTmp !== null) {
5034 1
        return $returnTmp;
5035
      }
5036 1
    }
5037
5038
    if (self::$support['iconv'] === true) {
5039
      $returnTmp = \iconv_strlen($str, $encoding);
5040 1
      if ($returnTmp !== false) {
5041 1
        return $returnTmp;
5042
      }
5043 1
    }
5044
5045
    // fallback via vanilla php
5046
    preg_match_all('/./us', $str, $parts);
5047
    $returnTmp = count($parts[0]);
5048
    if ($returnTmp !== 0) {
5049
      return $returnTmp;
5050
    }
5051
5052
    // fallback to "mb_"-function via polyfill
5053
    return \mb_strlen($str);
5054
  }
5055
5056
  /**
5057
   * Case insensitive string comparisons using a "natural order" algorithm.
5058
   *
5059 47
   * INFO: natural order version of UTF8::strcasecmp()
5060
   *
5061
   * @param string $str1 <p>The first string.</p>
5062 47
   * @param string $str2 <p>The second string.</p>
5063
   *
5064 47
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5065 9
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5066
   *             <strong>0</strong> if they are equal
5067
   */
5068 45
  public static function strnatcasecmp($str1, $str2)
5069
  {
5070
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5071
  }
5072 1
5073 1
  /**
5074
   * String comparisons using a "natural order" algorithm
5075 45
   *
5076 45
   * INFO: natural order version of UTF8::strcmp()
5077 37
   *
5078 37
   * @link  http://php.net/manual/en/function.strnatcmp.php
5079
   *
5080 45
   * @param string $str1 <p>The first string.</p>
5081 2
   * @param string $str2 <p>The second string.</p>
5082
   *
5083
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5084 43
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5085 20
   *             <strong>0</strong> if they are equal
5086 20
   */
5087 41
  public static function strnatcmp($str1, $str2)
5088
  {
5089
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5090 43
  }
5091
5092
  /**
5093
   * Case-insensitive string comparison of the first n characters.
5094
   *
5095
   * @link  http://php.net/manual/en/function.strncasecmp.php
5096 43
   *
5097 2
   * @param string $str1 <p>The first string.</p>
5098 43
   * @param string $str2 <p>The second string.</p>
5099 43
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5100 43
   *
5101 1
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5102
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5103
   *             <strong>0</strong> if they are equal
5104 43
   */
5105 43
  public static function strncasecmp($str1, $str2, $len)
5106
  {
5107
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5108
  }
5109
5110
  /**
5111
   * String comparison of the first n characters.
5112
   *
5113
   * @link  http://php.net/manual/en/function.strncmp.php
5114
   *
5115
   * @param string $str1 <p>The first string.</p>
5116
   * @param string $str2 <p>The second string.</p>
5117
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5118
   *
5119
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5120
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5121
   *             <strong>0</strong> if they are equal
5122
   */
5123
  public static function strncmp($str1, $str2, $len)
5124
  {
5125
    $str1 = self::substr($str1, 0, $len);
5126
    $str2 = self::substr($str2, 0, $len);
5127
5128
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5125 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5126 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5129
  }
5130
5131
  /**
5132
   * Search a string for any of a set of characters.
5133
   *
5134
   * @link  http://php.net/manual/en/function.strpbrk.php
5135 1
   *
5136
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5137 1
   * @param string $char_list <p>This parameter is case sensitive.</p>
5138 1
   *
5139
   * @return string String starting from the character found, or false if it is not found.
5140 1
   */
5141
  public static function strpbrk($haystack, $char_list)
5142
  {
5143
    $haystack = (string)$haystack;
5144
    $char_list = (string)$char_list;
5145
5146
    if (!isset($haystack[0], $char_list[0])) {
5147
      return false;
5148
    }
5149
5150
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5151
      return substr($haystack, strpos($haystack, $m[0]));
5152
    } else {
5153
      return false;
5154
    }
5155
  }
5156
5157
  /**
5158
   * Find position of first occurrence of string in a string.
5159
   *
5160
   * @link http://php.net/manual/en/function.mb-strpos.php
5161 1
   *
5162
   * @param string  $haystack  <p>The string being checked.</p>
5163 1
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5164 1
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5165
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5166 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5167 1
   *
5168
   * @return int|false <p>
5169
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5170 1
   *                   If needle is not found it returns false.
5171 1
   *                   </p>
5172 1
   */
5173
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5174 1
  {
5175 1
    $haystack = (string)$haystack;
5176
    $needle = (string)$needle;
5177
5178 1
    if (!isset($haystack[0], $needle[0])) {
5179 1
      return false;
5180
    }
5181 1
5182 1
    // init
5183 1
    $offset = (int)$offset;
5184
5185 1
    // iconv and mbstring do not support integer $needle
5186
5187
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5188
      $needle = (string)self::chr($needle);
5189
    }
5190
5191
    if ($cleanUtf8 === true) {
5192 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5193
      // if invalid characters are found in $haystack before $needle
5194
      $needle = self::clean($needle);
5195
      $haystack = self::clean($haystack);
5196
    }
5197
5198 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5199
        $encoding === 'UTF-8'
5200
        ||
5201
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5202
    ) {
5203
      $encoding = 'UTF-8';
5204
    } else {
5205
      $encoding = self::normalize_encoding($encoding);
5206
    }
5207 6
5208
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5209 6
      self::checkForSupport();
5210 1
    }
5211
5212 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5213 1
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...pport['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...port['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5214 1
        &
5215 1
        self::$support['iconv'] === true
5216 1
        &&
5217
        self::$support['mbstring'] === false
5218
    ) {
5219
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5220 1
    }
5221 1
5222 1
    if (
5223 1
        $offset >= 0 // iconv_strpos() can't handle negative offset
5224 1
        &&
5225 1
        $encoding !== 'UTF-8'
5226 1
        &&
5227 1
        self::$support['mbstring'] === false
5228
        &&
5229
        self::$support['iconv'] === true
5230
    ) {
5231 1
      // ignore invalid negative offset to keep compatibility
5232 1
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5233 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5234 1
    }
5235 1
5236 1
    if (self::$support['mbstring'] === true) {
5237 1
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5238 1
    }
5239
5240 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5241 1
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
5242 1
      if ($returnTmp !== false) {
5243 1
        return $returnTmp;
5244 1
      }
5245
    }
5246
5247
    if (
5248 1
        $offset >= 0 // iconv_strpos() can't handle negative offset
5249
        &&
5250 6
        self::$support['iconv'] === true
5251 1
    ) {
5252 1
      // ignore invalid negative offset to keep compatibility
5253 1
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5254 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5255
    }
5256 1
5257
    // fallback via vanilla php
5258
5259 6
    $haystack = self::substr($haystack, $offset);
5260 6
5261
    if ($offset < 0) {
5262 6
      $offset = 0;
5263 4
    }
5264 4
5265
    $pos = strpos($haystack, $needle);
5266 6
    if ($pos === false) {
5267
      return false;
5268 6
    }
5269
5270
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5271
    if ($returnTmp !== false) {
5272
      return $returnTmp;
5273
    }
5274
5275
    // fallback to "mb_"-function via polyfill
5276
    return \mb_strpos($haystack, $needle, $offset);
5277
  }
5278
5279
  /**
5280 1
   * Finds the last occurrence of a character in a string within another.
5281
   *
5282 1
   * @link http://php.net/manual/en/function.mb-strrchr.php
5283
   *
5284 1
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5285 1
   * @param string $needle        <p>The string to find in haystack</p>
5286
   * @param bool   $before_needle [optional] <p>
5287
   *                              Determines which portion of haystack
5288 1
   *                              this function returns.
5289 1
   *                              If set to true, it returns all of haystack
5290 1
   *                              from the beginning to the last occurrence of needle.
5291
   *                              If set to false, it returns all of haystack
5292 1
   *                              from the last occurrence of needle to the end,
5293
   *                              </p>
5294
   * @param string $encoding      [optional] <p>
5295 1
   *                              Character encoding name to use.
5296 1
   *                              If it is omitted, internal character encoding is used.
5297
   *                              </p>
5298 1
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5299 1
   *
5300
   * @return string|false The portion of haystack or false if needle is not found.
5301 1
   */
5302 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5303 1
  {
5304 1
    if ($encoding !== 'UTF-8') {
5305
      $encoding = self::normalize_encoding($encoding);
5306 1
    }
5307
5308 1
    if ($cleanUtf8 === true) {
5309
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5310 1
      // if invalid characters are found in $haystack before $needle
5311
      $needle = self::clean($needle);
5312 1
      $haystack = self::clean($haystack);
5313
    }
5314
5315
    // fallback to "mb_"-function via polyfill
5316
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5317
  }
5318
5319
  /**
5320
   * Reverses characters order in the string.
5321
   *
5322
   * @param string $str The input string
5323
   *
5324
   * @return string The string with characters in the reverse sequence
5325
   */
5326 7
  public static function strrev($str)
5327
  {
5328 7
    $str = (string)$str;
5329
5330
    if (!isset($str[0])) {
5331
      return '';
5332
    }
5333
5334
    return implode('', array_reverse(self::split($str)));
5335
  }
5336
5337
  /**
5338
   * Finds the last occurrence of a character in a string within another, case insensitive.
5339
   *
5340 1
   * @link http://php.net/manual/en/function.mb-strrichr.php
5341
   *
5342 1
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5343
   * @param string  $needle        <p>The string to find in haystack.</p>
5344
   * @param bool    $before_needle [optional] <p>
5345
   *                               Determines which portion of haystack
5346
   *                               this function returns.
5347
   *                               If set to true, it returns all of haystack
5348
   *                               from the beginning to the last occurrence of needle.
5349
   *                               If set to false, it returns all of haystack
5350
   *                               from the last occurrence of needle to the end,
5351
   *                               </p>
5352
   * @param string  $encoding      [optional] <p>
5353
   *                               Character encoding name to use.
5354 1
   *                               If it is omitted, internal character encoding is used.
5355
   *                               </p>
5356 1
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5357
   *
5358
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5359
   */
5360 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5361
  {
5362
    if ($encoding !== 'UTF-8') {
5363
      $encoding = self::normalize_encoding($encoding);
5364
    }
5365
5366
    if ($cleanUtf8 === true) {
5367
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5368 1
      // if invalid characters are found in $haystack before $needle
5369
      $needle = self::clean($needle);
5370 1
      $haystack = self::clean($haystack);
5371
    }
5372
5373
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5374
  }
5375
5376
  /**
5377
   * Find position of last occurrence of a case-insensitive string.
5378
   *
5379
   * @param string  $haystack  <p>The string to look in.</p>
5380
   * @param string  $needle    <p>The string to look for.</p>
5381
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5382
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5383
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5384
   *
5385 13
   * @return int|false <p>
5386
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5387 13
   *                   not found, it returns false.
5388
   *                   </p>
5389
   */
5390 13
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5391
  {
5392 13
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5393 3
      $needle = (string)self::chr($needle);
5394
    }
5395
5396 11
    // init
5397
    $haystack = (string)$haystack;
5398
    $needle = (string)$needle;
5399 11
    $offset = (int)$offset;
5400 7
5401
    if (!isset($haystack[0], $needle[0])) {
5402
      return false;
5403 5
    }
5404 1
5405 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5406
        $cleanUtf8 === true
5407
        ||
5408 1
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5409 1
    ) {
5410
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5411
5412 1
      $needle = self::clean($needle);
5413 1
      $haystack = self::clean($haystack);
5414
    }
5415
5416 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5417
        $encoding === 'UTF-8'
5418
        ||
5419 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5420
    ) {
5421 5
      $encoding = 'UTF-8';
5422 5
    } else {
5423 5
      $encoding = self::normalize_encoding($encoding);
5424
    }
5425 5
5426
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5427 5
      self::checkForSupport();
5428 5
    }
5429
5430 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5431 5
        $encoding !== 'UTF-8'
5432
        &&
5433
        self::$support['mbstring'] === false
5434 5
    ) {
5435 5
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5436 5
    }
5437
5438 5
    if (self::$support['mbstring'] === true) {
5439 2
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5440
    }
5441 2
5442 2 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5443 2
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
5444
      if ($returnTmp !== false) {
5445 2
        return $returnTmp;
5446 1
      }
5447
    }
5448 1
5449 1
    // fallback via vanilla php
5450 1
5451
    return self::strrpos(self::strtonatfold($haystack), self::strtonatfold($needle), $offset, $encoding, $cleanUtf8);
5452 1
  }
5453
5454
  /**
5455
   * Find position of last occurrence of a string in a string.
5456
   *
5457
   * @link http://php.net/manual/en/function.mb-strrpos.php
5458
   *
5459
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5460
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5461
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5462
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5463
   *                              the end of the string.
5464
   *                              </p>
5465
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5466
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5467 1
   *
5468 2
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5469
   *                   is not found, it returns false.</p>
5470 5
   */
5471
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5472
  {
5473
    if ((int)$needle === $needle && $needle >= 0) {
5474
      $needle = (string)self::chr($needle);
5475 5
    }
5476
5477
    // init
5478
    $haystack = (string)$haystack;
5479
    $needle = (string)$needle;
5480 5
    $offset = (int)$offset;
5481 5
5482 1
    if (!isset($haystack[0], $needle[0])) {
5483 1
      return false;
5484
    }
5485 1
5486 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5487 1
        $cleanUtf8 === true
5488
        ||
5489 1
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5490
    ) {
5491 5
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5492 5
      $needle = self::clean($needle);
5493 5
      $haystack = self::clean($haystack);
5494 5
    }
5495 1
5496 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5497 5
        $encoding === 'UTF-8'
5498
        ||
5499 5
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5500
    ) {
5501
      $encoding = 'UTF-8';
5502
    } else {
5503
      $encoding = self::normalize_encoding($encoding);
5504
    }
5505
5506
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5507
      self::checkForSupport();
5508
    }
5509 2
5510 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5511 2
        $encoding !== 'UTF-8'
5512
        &&
5513 1
        self::$support['mbstring'] === false
5514
    ) {
5515
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5516 1
    }
5517 1
5518 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5519 1
      $returnTmp = \mb_strrpos($haystack, $needle, $offset, $encoding);
5520
      if ($returnTmp !== false) {
5521
        return $returnTmp;
5522 2
      }
5523
    }
5524 2
5525 1 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5526
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
5527
      if ($returnTmp !== false) {
5528 2
        return $returnTmp;
5529
      }
5530
    }
5531
5532
    // fallback via vanilla php
5533
5534
    if ($offset > 0) {
5535
      $haystack = self::substr($haystack, $offset);
5536
    } elseif ($offset < 0) {
5537
      $haystack = self::substr($haystack, 0, $offset);
5538
      $offset = 0;
5539
    }
5540 1
5541
    $pos = strrpos($haystack, $needle);
5542 1
    if ($pos === false) {
5543
      return false;
5544
    }
5545
5546
    return $offset + self::strlen(substr($haystack, 0, $pos));
5547
  }
5548
5549
  /**
5550
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5551
   * mask.
5552
   *
5553
   * @param string $str    <p>The input string.</p>
5554
   * @param string $mask   <p>The mask of chars</p>
5555
   * @param int    $offset [optional]
5556
   * @param int    $length [optional]
5557
   *
5558
   * @return int
5559
   */
5560
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5561
  {
5562
    // init
5563
    $length = (int)$length;
5564
    $offset = (int)$offset;
5565
5566
    if ($offset || 2147483647 !== $length) {
5567
      $str = self::substr($str, $offset, $length);
5568 20
    }
5569
5570 20
    $str = (string)$str;
5571 2
    if (!isset($str[0], $mask[0])) {
5572
      return 0;
5573
    }
5574 2
5575 2
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5576
  }
5577 2
5578
  /**
5579
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5580 20
   *
5581
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5582 20
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5583 4
   * @param bool    $before_needle [optional] <p>
5584
   *                               If <b>TRUE</b>, strstr() returns the part of the
5585
   *                               haystack before the first occurrence of the needle (excluding the needle).
5586 19
   *                               </p>
5587 19
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5588
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5589
   *
5590 19
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5591 19
   */
5592
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5593 19
  {
5594 19
    $haystack = (string)$haystack;
5595 19
    $needle = (string)$needle;
5596 19
5597
    if (!isset($haystack[0], $needle[0])) {
5598 19
      return false;
5599
    }
5600 16
5601 16
    if ($cleanUtf8 === true) {
5602 16
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5603 16
      // if invalid characters are found in $haystack before $needle
5604 5
      $needle = self::clean($needle);
5605 5
      $haystack = self::clean($haystack);
5606 5
    }
5607
5608
    if ($encoding !== 'UTF-8') {
5609 19
      $encoding = self::normalize_encoding($encoding);
5610
    }
5611 17
5612 13
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5613 13
      self::checkForSupport();
5614 13
    }
5615 8
5616 8 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5617 8
        $encoding !== 'UTF-8'
5618
        &&
5619
        self::$support['mbstring'] === false
5620 19
    ) {
5621
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5622 9
    }
5623 4
5624 4
    if (self::$support['mbstring'] === true) {
5625 4
      $returnTmp = \mb_strstr($haystack, $needle, $before_needle, $encoding);
5626 6
      if ($returnTmp !== false) {
5627 6
        return $returnTmp;
5628 6
      }
5629
    }
5630
5631 9
    if (self::$support['intl'] === true) {
5632 6
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
5633 6
      if ($returnTmp !== false) {
5634 6
        return $returnTmp;
5635
      }
5636
    }
5637 19
5638
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5639 4
5640 4
    if (!isset($match[1])) {
5641 2
      return false;
5642 2
    }
5643 3
5644 3
    if ($before_needle) {
5645 3
      return $match[1];
5646
    }
5647
5648 4
    return self::substr($haystack, self::strlen($match[1]));
5649 16
  }
5650
5651 19
  /**
5652
   * Unicode transformation for case-less matching.
5653
   *
5654 19
   * @link http://unicode.org/reports/tr21/tr21-5.html
5655 19
   *
5656
   * @param string  $str       <p>The input string.</p>
5657 3
   * @param bool    $full      [optional] <p>
5658 19
   *                           <b>true</b>, replace full case folding chars (default)<br />
5659
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5660 19
   *                           </p>
5661
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5662
   *
5663 19
   * @return string
5664 19
   */
5665 19
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5666 2
  {
5667 19
    // init
5668
    $str = (string)$str;
5669 19
5670
    if (!isset($str[0])) {
5671 19
      return '';
5672
    }
5673
5674
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5675
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5676
5677
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5678
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5679
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5680
    }
5681
5682
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5683
5684
    if ($full) {
5685
5686
      static $fullCaseFold = null;
5687 26
5688
      if ($fullCaseFold === null) {
5689 26
        $fullCaseFold = self::getData('caseFolding_full');
5690
      }
5691 26
5692 5
      /** @noinspection OffsetOperationsInspection */
5693
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5694
    }
5695
5696 22
    if ($cleanUtf8 === true) {
5697 6
      $str = self::clean($str);
5698
    }
5699
5700 16
    return self::strtolower($str);
5701
  }
5702
5703
  /**
5704
   * Make a string lowercase.
5705
   *
5706
   * @link http://php.net/manual/en/function.mb-strtolower.php
5707
   *
5708
   * @param string  $str       <p>The string being lowercased.</p>
5709
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5710
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5711
   *
5712 14
   * @return string str with all alphabetic characters converted to lowercase.
5713
   */
5714 14 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5715
  {
5716
    // init
5717
    $str = (string)$str;
5718
5719
    if (!isset($str[0])) {
5720
      return '';
5721
    }
5722
5723
    if ($cleanUtf8 === true) {
5724
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5725
      // if invalid characters are found in $haystack before $needle
5726
      $str = self::clean($str);
5727
    }
5728 1
5729
    if ($encoding !== 'UTF-8') {
5730 1
      $encoding = self::normalize_encoding($encoding);
5731
    }
5732
5733
    return \mb_strtolower($str, $encoding);
5734
  }
5735
5736
  /**
5737
   * Generic case sensitive transformation for collation matching.
5738
   *
5739
   * @param string $str <p>The input string</p>
5740
   *
5741
   * @return string
5742
   */
5743
  private static function strtonatfold($str)
5744 8
  {
5745
    /** @noinspection PhpUndefinedClassInspection */
5746 8
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5747 2
  }
5748
5749
  /**
5750 7
   * Make a string uppercase.
5751 7
   *
5752 7
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5753
   *
5754 7
   * @param string  $str       <p>The string being uppercased.</p>
5755 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5756 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5757 7
   *
5758
   * @return string str with all alphabetic characters converted to uppercase.
5759
   */
5760 7 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5761
  {
5762 7
    $str = (string)$str;
5763 7
5764
    if (!isset($str[0])) {
5765
      return '';
5766
    }
5767 7
5768
    if ($cleanUtf8 === true) {
5769
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5770
      // if invalid characters are found in $haystack before $needle
5771 1
      $str = self::clean($str);
5772 1
    }
5773 1
5774 7
    if ($encoding !== 'UTF-8') {
5775 7
      $encoding = self::normalize_encoding($encoding);
5776 7
    }
5777
5778 7
    return \mb_strtoupper($str, $encoding);
5779 7
  }
5780
5781 7
  /**
5782
   * Translate characters or replace sub-strings.
5783
   *
5784
   * @link  http://php.net/manual/en/function.strtr.php
5785
   *
5786
   * @param string          $str  <p>The string being translated.</p>
5787
   * @param string|string[] $from <p>The string replacing from.</p>
5788
   * @param string|string[] $to   <p>The string being translated to to.</p>
5789
   *
5790
   * @return string <p>
5791
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5792
   *                corresponding character in to.
5793
   *                </p>
5794
   */
5795
  public static function strtr($str, $from, $to = INF)
5796
  {
5797
    if (INF !== $to) {
5798
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5798 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5799
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5799 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5800
      $countFrom = count($from);
5801 1
      $countTo = count($to);
5802
5803 1
      if ($countFrom > $countTo) {
5804
        $from = array_slice($from, 0, $countTo);
5805 1
      } elseif ($countFrom < $countTo) {
5806 1
        $to = array_slice($to, 0, $countFrom);
5807
      }
5808
5809 1
      $from = array_combine($from, $to);
5810
    }
5811 1
5812
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5795 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5813 1
  }
5814 1
5815 1
  /**
5816 1
   * Return the width of a string.
5817
   *
5818 1
   * @param string  $str       <p>The input string.</p>
5819 1
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5820 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5821
   *
5822 1
   * @return int
5823
   */
5824
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5825
  {
5826
    if ($encoding !== 'UTF-8') {
5827
      $encoding = self::normalize_encoding($encoding);
5828
    }
5829
5830 1
    if ($cleanUtf8 === true) {
5831
      // iconv and mbstring are not tolerant to invalid encoding
5832
      // further, their behaviour is inconsistent with that of PHP's substr
5833
      $str = self::clean($str);
5834
    }
5835
5836
    // fallback to "mb_"-function via polyfill
5837
    return \mb_strwidth($str, $encoding);
5838
  }
5839
5840
  /**
5841
   * Get part of a string.
5842
   *
5843
   * @link http://php.net/manual/en/function.mb-substr.php
5844
   *
5845
   * @param string  $str       <p>The string being checked.</p>
5846
   * @param int     $start     <p>The first position used in str.</p>
5847
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5848
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5849
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5850
   *
5851
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5852
   */
5853
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5854
  {
5855
    // init
5856
    $str = (string)$str;
5857
5858
    if (!isset($str[0])) {
5859
      return '';
5860
    }
5861
5862
    if ($cleanUtf8 === true) {
5863
      // iconv and mbstring are not tolerant to invalid encoding
5864
      // further, their behaviour is inconsistent with that of PHP's substr
5865
      $str = self::clean($str);
5866
    }
5867
5868
    $str_length = 0;
5869
    if ($start || $length === null) {
5870
      $str_length = (int)self::strlen($str);
5871
    }
5872
5873
    if ($start && $start > $str_length) {
5874
      return false;
5875
    }
5876
5877
    if ($length === null) {
5878
      $length = $str_length;
5879
    } else {
5880
      $length = (int)$length;
5881
    }
5882
5883 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5884
        $encoding === 'UTF-8'
5885
        ||
5886
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5887
    ) {
5888
      $encoding = 'UTF-8';
5889
    } else {
5890
      $encoding = self::normalize_encoding($encoding);
5891
    }
5892
5893
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5894
      self::checkForSupport();
5895
    }
5896
5897 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5898
        $encoding !== 'UTF-8'
5899
        &&
5900
        self::$support['mbstring'] === false
5901
    ) {
5902
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5903
    }
5904
5905
    if (self::$support['mbstring'] === true) {
5906
      return \mb_substr($str, $start, $length, $encoding);
5907
    }
5908
5909
    if (
5910
        $length >= 0 // "iconv_substr()" can't handle negative length
5911
        &&
5912
        self::$support['iconv'] === true
5913
    ) {
5914
      return \iconv_substr($str, $start, $length);
5915
    }
5916
5917
    if (self::$support['intl'] === true) {
5918
      return \grapheme_substr($str, $start, $length);
5919
    }
5920
5921
    // fallback via vanilla php
5922
5923
    // split to array, and remove invalid characters
5924
    $array = self::split($str);
5925
5926
    // extract relevant part, and join to make sting again
5927
    return implode('', array_slice($array, $start, $length));
5928
  }
5929
5930
  /**
5931
   * Binary safe comparison of two strings from an offset, up to length characters.
5932
   *
5933
   * @param string  $main_str           <p>The main string being compared.</p>
5934
   * @param string  $str                <p>The secondary string being compared.</p>
5935
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5936
   *                                    the end of the string.</p>
5937
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5938
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5939
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5940
   *                                    insensitive.</p>
5941
   *
5942
   * @return int
5943
   */
5944
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5945
  {
5946
    $main_str = self::substr($main_str, $offset, $length);
5947
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5946 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5948
5949
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5946 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5947 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5946 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5947 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5950
  }
5951
5952
  /**
5953
   * Count the number of substring occurrences.
5954
   *
5955
   * @link  http://php.net/manual/en/function.substr-count.php
5956
   *
5957
   * @param string  $haystack  <p>The string to search in.</p>
5958
   * @param string  $needle    <p>The substring to search for.</p>
5959
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5960
   * @param int     $length    [optional] <p>
5961
   *                           The maximum length after the specified offset to search for the
5962
   *                           substring. It outputs a warning if the offset plus the length is
5963
   *                           greater than the haystack length.
5964
   *                           </p>
5965
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5966
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5967
   *
5968
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5969
   */
5970
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5971
  {
5972
    // init
5973
    $haystack = (string)$haystack;
5974
    $needle = (string)$needle;
5975
5976
    if (!isset($haystack[0], $needle[0])) {
5977
      return false;
5978
    }
5979
5980
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5981
      $offset = (int)$offset;
5982
      $length = (int)$length;
5983
5984
      if (
5985
          $length + $offset <= 0
5986
          &&
5987
          Bootup::is_php('7.1') === false
5988
      ) {
5989
        return false;
5990
      }
5991
5992
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5993
    }
5994
5995
    if ($encoding !== 'UTF-8') {
5996
      $encoding = self::normalize_encoding($encoding);
5997
    }
5998
5999
    if ($cleanUtf8 === true) {
6000
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6001
      // if invalid characters are found in $haystack before $needle
6002
      $needle = self::clean($needle);
6003
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6004
    }
6005
6006
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
6007
      self::checkForSupport();
6008
    }
6009
6010 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6011
        $encoding !== 'UTF-8'
6012
        &&
6013
        self::$support['mbstring'] === false
6014
    ) {
6015
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6016
    }
6017
6018
    if (self::$support['mbstring'] === true) {
6019
      return \mb_substr_count($haystack, $needle, $encoding);
6020
    }
6021
6022
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6023
6024
    return count($matches);
6025
  }
6026
6027
  /**
6028
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6029
   *
6030
   * @param string $haystack <p>The string to search in.</p>
6031
   * @param string $needle   <p>The substring to search for.</p>
6032
   *
6033
   * @return string <p>Return the sub-string.</p>
6034
   */
6035 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6036
  {
6037
    // init
6038
    $haystack = (string)$haystack;
6039
    $needle = (string)$needle;
6040
6041
    if (!isset($haystack[0])) {
6042
      return '';
6043
    }
6044
6045
    if (!isset($needle[0])) {
6046
      return $haystack;
6047
    }
6048
6049
    if (self::str_istarts_with($haystack, $needle) === true) {
6050
      $haystack = self::substr($haystack, self::strlen($needle));
6051
    }
6052
6053
    return $haystack;
6054
  }
6055
6056
  /**
6057 1
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6058
   *
6059 1
   * @param string $haystack <p>The string to search in.</p>
6060
   * @param string $needle   <p>The substring to search for.</p>
6061
   *
6062
   * @return string <p>Return the sub-string.</p>
6063
   */
6064 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6065
  {
6066
    // init
6067
    $haystack = (string)$haystack;
6068
    $needle = (string)$needle;
6069 6
6070
    if (!isset($haystack[0])) {
6071 6
      return '';
6072 6
    }
6073
6074 6
    if (!isset($needle[0])) {
6075
      return $haystack;
6076 6
    }
6077 3
6078
    if (self::str_iends_with($haystack, $needle) === true) {
6079
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6080
    }
6081 6
6082
    return $haystack;
6083 6
  }
6084 1
6085 1
  /**
6086 1
   * Removes an prefix ($needle) from start of the string ($haystack).
6087
   *
6088 6
   * @param string $haystack <p>The string to search in.</p>
6089
   * @param string $needle   <p>The substring to search for.</p>
6090
   *
6091
   * @return string <p>Return the sub-string.</p>
6092
   */
6093 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6094
  {
6095
    // init
6096
    $haystack = (string)$haystack;
6097
    $needle = (string)$needle;
6098 6
6099
    if (!isset($haystack[0])) {
6100 6
      return '';
6101
    }
6102 6
6103 6
    if (!isset($needle[0])) {
6104
      return $haystack;
6105
    }
6106 5
6107 5
    if (self::str_starts_with($haystack, $needle) === true) {
6108
      $haystack = self::substr($haystack, self::strlen($needle));
6109 5
    }
6110 1
6111 1
    return $haystack;
6112 1
  }
6113
6114 5
  /**
6115
   * Replace text within a portion of a string.
6116
   *
6117
   * source: https://gist.github.com/stemar/8287074
6118
   *
6119
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6120
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6121
   * @param int|int[]       $start            <p>
6122
   *                                          If start is positive, the replacing will begin at the start'th offset
6123
   *                                          into string.
6124
   *                                          <br /><br />
6125
   *                                          If start is negative, the replacing will begin at the start'th character
6126
   *                                          from the end of string.
6127
   *                                          </p>
6128
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6129
   *                                          portion of string which is to be replaced. If it is negative, it
6130
   *                                          represents the number of characters from the end of string at which to
6131
   *                                          stop replacing. If it is not given, then it will default to strlen(
6132
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6133
   *                                          length is zero then this function will have the effect of inserting
6134
   *                                          replacement into string at the given start offset.</p>
6135
   *
6136
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6137
   */
6138
  public static function substr_replace($str, $replacement, $start, $length = null)
6139
  {
6140
    if (is_array($str)) {
6141
      $num = count($str);
6142
6143
      // $replacement
6144 1
      if (is_array($replacement)) {
6145
        $replacement = array_slice($replacement, 0, $num);
6146 1
      } else {
6147
        $replacement = array_pad(array($replacement), $num, $replacement);
6148
      }
6149
6150
      // $start
6151 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6152
        $start = array_slice($start, 0, $num);
6153
        foreach ($start as &$valueTmp) {
6154
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6155
        }
6156
        unset($valueTmp);
6157
      } else {
6158 1
        $start = array_pad(array($start), $num, $start);
6159
      }
6160 1
6161
      // $length
6162 1
      if (!isset($length)) {
6163 1
        $length = array_fill(0, $num, 0);
6164 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6165
        $length = array_slice($length, 0, $num);
6166 1
        foreach ($length as &$valueTmpV2) {
6167
          if (isset($valueTmpV2)) {
6168 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6169 1
          } else {
6170
            $valueTmpV2 = 0;
6171
          }
6172 1
        }
6173
        unset($valueTmpV2);
6174
      } else {
6175 1
        $length = array_pad(array($length), $num, $length);
6176 1
      }
6177 1
6178 1
      // Recursive call
6179 1
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6180
6181
    } else {
6182 1
6183
      if (is_array($replacement)) {
6184
        if (count($replacement) > 0) {
6185
          $replacement = $replacement[0];
6186
        } else {
6187
          $replacement = '';
6188
        }
6189
      }
6190
    }
6191
6192
    // init
6193
    $str = (string)$str;
6194
    $replacement = (string)$replacement;
6195
6196
    if (!isset($str[0])) {
6197
      return $replacement;
6198
    }
6199
6200
    preg_match_all('/./us', $str, $smatches);
6201 10
    preg_match_all('/./us', $replacement, $rmatches);
6202
6203 10
    if ($length === null) {
6204 10
      $length = (int)self::strlen($str);
6205
    }
6206 10
6207 3
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6208
6209
    return implode('', $smatches[0]);
6210 8
  }
6211 8
6212 8
  /**
6213
   * Removes an suffix ($needle) from end of the string ($haystack).
6214 8
   *
6215
   * @param string $haystack <p>The string to search in.</p>
6216 8
   * @param string $needle   <p>The substring to search for.</p>
6217
   *
6218 8
   * @return string <p>Return the sub-string.</p>
6219 1
   */
6220 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6221 1
  {
6222
    $haystack = (string)$haystack;
6223 8
    $needle = (string)$needle;
6224 8
6225
    if (!isset($haystack[0])) {
6226 8
      return '';
6227 8
    }
6228 8
6229 8
    if (!isset($needle[0])) {
6230 8
      return $haystack;
6231
    }
6232 8
6233 8
    if (self::str_ends_with($haystack, $needle) === true) {
6234 8
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6235 8
    }
6236
6237 8
    return $haystack;
6238 6
  }
6239 6
6240 6
  /**
6241 6
   * Returns a case swapped version of the string.
6242
   *
6243 6
   * @param string  $str       <p>The input string.</p>
6244 3
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6245 3
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6246
   *
6247 6
   * @return string <p>Each character's case swapped.</p>
6248 6
   */
6249
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6250 8
  {
6251
    $str = (string)$str;
6252
6253
    if (!isset($str[0])) {
6254
      return '';
6255
    }
6256
6257
    if ($encoding !== 'UTF-8') {
6258 1
      $encoding = self::normalize_encoding($encoding);
6259
    }
6260 1
6261
    if ($cleanUtf8 === true) {
6262
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6263
      // if invalid characters are found in $haystack before $needle
6264
      $str = self::clean($str);
6265
    }
6266
6267
    $strSwappedCase = preg_replace_callback(
6268
        '/[\S]/u',
6269
        function ($match) use ($encoding) {
6270
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6271
6272
          if ($match[0] === $marchToUpper) {
6273
            return UTF8::strtolower($match[0], $encoding);
6274
          } else {
6275
            return $marchToUpper;
6276
          }
6277
        },
6278
        $str
6279
    );
6280
6281
    return $strSwappedCase;
6282
  }
6283
6284
  /**
6285
   * alias for "UTF8::to_ascii()"
6286
   *
6287
   * @see UTF8::to_ascii()
6288
   *
6289
   * @param string $s
6290
   * @param string $subst_chr
6291
   * @param bool   $strict
6292
   *
6293
   * @return string
6294
   *
6295
   * @deprecated
6296
   */
6297
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6298
  {
6299
    return self::to_ascii($s, $subst_chr, $strict);
6300
  }
6301
6302
  /**
6303
   * alias for "UTF8::to_iso8859()"
6304
   *
6305
   * @see UTF8::to_iso8859()
6306
   *
6307
   * @param string $str
6308
   *
6309
   * @return string|string[]
6310
   *
6311
   * @deprecated
6312
   */
6313
  public static function toIso8859($str)
6314
  {
6315
    return self::to_iso8859($str);
6316
  }
6317
6318
  /**
6319
   * alias for "UTF8::to_latin1()"
6320
   *
6321
   * @see UTF8::to_latin1()
6322
   *
6323
   * @param $str
6324
   *
6325
   * @return string
6326
   *
6327
   * @deprecated
6328
   */
6329
  public static function toLatin1($str)
6330
  {
6331
    return self::to_latin1($str);
6332
  }
6333
6334
  /**
6335
   * alias for "UTF8::to_utf8()"
6336
   *
6337
   * @see UTF8::to_utf8()
6338
   *
6339
   * @param string $str
6340
   *
6341
   * @return string
6342
   *
6343
   * @deprecated
6344
   */
6345
  public static function toUTF8($str)
6346
  {
6347
    return self::to_utf8($str);
6348
  }
6349
6350
  /**
6351
   * Convert a string into ASCII.
6352
   *
6353
   * @param string $str     <p>The input string.</p>
6354
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6355
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6356
   *                        performance</p>
6357
   *
6358
   * @return string
6359
   *
6360
   * @throws \Exception
6361
   */
6362
  public static function to_ascii($str, $unknown = '?', $strict = false)
6363
  {
6364
    static $UTF8_TO_ASCII;
6365
6366
    // init
6367
    $str = (string)$str;
6368
6369
    if (!isset($str[0])) {
6370
      return '';
6371
    }
6372
6373
    $str = self::clean($str, true, true, true);
6374
6375
    // check if we only have ASCII
6376
    if (self::is_ascii($str) === true) {
6377
      return $str;
6378
    }
6379
6380
    if ($strict === true) {
6381
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
6382
        self::checkForSupport();
6383
      }
6384
6385
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6386
6387
        // HACK for issue from "transliterator_transliterate()"
6388
        $str = str_replace(
6389
            'ℌ',
6390
            'H',
6391
            $str
6392
        );
6393
6394
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6395
6396
        // check again, if we only have ASCII, now ...
6397
        if (self::is_ascii($str) === true) {
6398
          return $str;
6399
        }
6400
6401
      } else {
6402
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
6403
      }
6404
    }
6405
6406
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6407
    $chars = $ar[0];
6408
    foreach ($chars as &$c) {
6409
6410
      $ordC0 = ord($c[0]);
6411
6412
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6413
        continue;
6414
      }
6415
6416
      $ordC1 = ord($c[1]);
6417
6418
      // ASCII - next please
6419
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6420
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6421
      }
6422
6423
      if ($ordC0 >= 224) {
6424
        $ordC2 = ord($c[2]);
6425
6426
        if ($ordC0 <= 239) {
6427
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6428
        }
6429
6430
        if ($ordC0 >= 240) {
6431
          $ordC3 = ord($c[3]);
6432
6433
          if ($ordC0 <= 247) {
6434
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6435
          }
6436
6437
          if ($ordC0 >= 248) {
6438
            $ordC4 = ord($c[4]);
6439
6440 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6441
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6442
            }
6443
6444
            if ($ordC0 >= 252) {
6445
              $ordC5 = ord($c[5]);
6446
6447 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6448
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6449
              }
6450
            }
6451
          }
6452
        }
6453
      }
6454
6455
      if ($ordC0 == 254 || $ordC0 == 255) {
6456
        $c = $unknown;
6457
        continue;
6458
      }
6459
6460
      if (!isset($ord)) {
6461
        $c = $unknown;
6462
        continue;
6463
      }
6464
6465
      $bank = $ord >> 8;
6466
      if (!isset($UTF8_TO_ASCII[$bank])) {
6467
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6468
        if ($UTF8_TO_ASCII[$bank] === false) {
6469
          $UTF8_TO_ASCII[$bank] = array();
6470
        }
6471
      }
6472
6473
      $newchar = $ord & 255;
6474
6475
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6476
6477
        // keep for debugging
6478
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
45% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6479
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6480
        echo "char: " . $c . "\n";
6481
        echo "ord: " . $ord . "\n";
6482
        echo "newchar: " . $newchar . "\n";
6483
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6484
        echo "bank:" . $bank . "\n\n";
6485
        */
6486
6487
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6488
      } else {
6489
6490
        // keep for debugging missing chars
6491
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
41% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6492
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6493
        echo "char: " . $c . "\n";
6494
        echo "ord: " . $ord . "\n";
6495
        echo "newchar: " . $newchar . "\n";
6496
        echo "bank:" . $bank . "\n\n";
6497
        */
6498
6499
        $c = $unknown;
6500
      }
6501
    }
6502
6503
    return implode('', $chars);
6504
  }
6505
6506
  /**
6507
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6508
   *
6509
   * @param string|string[] $str
6510
   *
6511
   * @return string|string[]
6512
   */
6513
  public static function to_iso8859($str)
6514
  {
6515
    if (is_array($str)) {
6516
6517
      /** @noinspection ForeachSourceInspection */
6518
      foreach ($str as $k => $v) {
6519
        /** @noinspection AlterInForeachInspection */
6520
        /** @noinspection OffsetOperationsInspection */
6521
        $str[$k] = self::to_iso8859($v);
6522
      }
6523
6524
      return $str;
6525
    }
6526
6527
    $str = (string)$str;
6528
6529
    if (!isset($str[0])) {
6530
      return '';
6531
    }
6532
6533
    return self::utf8_decode($str);
6534
  }
6535
6536
  /**
6537
   * alias for "UTF8::to_iso8859()"
6538
   *
6539
   * @see UTF8::to_iso8859()
6540
   *
6541
   * @param string|string[] $str
6542
   *
6543
   * @return string|string[]
6544
   */
6545
  public static function to_latin1($str)
6546
  {
6547
    return self::to_iso8859($str);
6548
  }
6549
6550
  /**
6551
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6552
   *
6553
   * <ul>
6554
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6555
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
6556
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6557
   * case.</li>
6558
   * </ul>
6559
   *
6560
   * @param string|string[] $str                    <p>Any string or array.</p>
6561
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6562
   *
6563
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6564
   */
6565
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6566
  {
6567
    if (is_array($str)) {
6568
      /** @noinspection ForeachSourceInspection */
6569
      foreach ($str as $k => $v) {
6570
        /** @noinspection AlterInForeachInspection */
6571
        /** @noinspection OffsetOperationsInspection */
6572
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6573
      }
6574
6575
      return $str;
6576
    }
6577
6578
    $str = (string)$str;
6579
6580
    if (!isset($str[0])) {
6581
      return $str;
6582
    }
6583
6584
    $max = strlen($str);
6585
    $buf = '';
6586
6587
    /** @noinspection ForeachInvariantsInspection */
6588
    for ($i = 0; $i < $max; $i++) {
6589
      $c1 = $str[$i];
6590
6591
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6592
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6593
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6594
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6595
6596
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6597
6598
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6599
            $buf .= $c1 . $c2;
6600
            $i++;
6601
          } else { // not valid UTF8 - convert it
6602
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6603
            $cc2 = ($c1 & "\x3f") | "\x80";
6604
            $buf .= $cc1 . $cc2;
6605
          }
6606
6607 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6608
6609
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6610
            $buf .= $c1 . $c2 . $c3;
6611
            $i += 2;
6612
          } else { // not valid UTF8 - convert it
6613
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6614
            $cc2 = ($c1 & "\x3f") | "\x80";
6615
            $buf .= $cc1 . $cc2;
6616
          }
6617
6618
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6619
6620 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6621
            $buf .= $c1 . $c2 . $c3 . $c4;
6622
            $i += 3;
6623
          } else { // not valid UTF8 - convert it
6624
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6625
            $cc2 = ($c1 & "\x3f") | "\x80";
6626
            $buf .= $cc1 . $cc2;
6627
          }
6628
6629
        } else { // doesn't look like UTF8, but should be converted
6630
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6631
          $cc2 = (($c1 & "\x3f") | "\x80");
6632
          $buf .= $cc1 . $cc2;
6633
        }
6634
6635
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6636
6637
        $ordC1 = ord($c1);
6638
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6639
          $buf .= self::$win1252ToUtf8[$ordC1];
6640
        } else {
6641
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6642
          $cc2 = (($c1 & "\x3f") | "\x80");
6643
          $buf .= $cc1 . $cc2;
6644
        }
6645
6646
      } else { // it doesn't need conversion
6647
        $buf .= $c1;
6648
      }
6649
    }
6650
6651
    // decode unicode escape sequences
6652
    $buf = preg_replace_callback(
6653
        '/\\\\u([0-9a-f]{4})/i',
6654
        function ($match) {
6655
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6656
        },
6657
        $buf
6658
    );
6659
6660
    // decode UTF-8 codepoints
6661
    if ($decodeHtmlEntityToUtf8 === true) {
6662
      $buf = self::html_entity_decode($buf);
6663
    }
6664
6665
    return $buf;
6666
  }
6667
6668
  /**
6669
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6670
   *
6671
   * INFO: This is slower then "trim()"
6672
   *
6673
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6674
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6675
   *
6676
   * @param string $str   <p>The string to be trimmed</p>
6677
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6678
   *
6679
   * @return string <p>The trimmed string.</p>
6680
   */
6681
  public static function trim($str = '', $chars = INF)
6682
  {
6683
    $str = (string)$str;
6684
6685
    if (!isset($str[0])) {
6686
      return '';
6687
    }
6688
6689
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6690
    if ($chars === INF || !$chars) {
6691
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6692
    }
6693
6694
    return self::rtrim(self::ltrim($str, $chars), $chars);
6695
  }
6696
6697
  /**
6698
   * Makes string's first char uppercase.
6699
   *
6700
   * @param string  $str       <p>The input string.</p>
6701
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6702
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6703
   *
6704
   * @return string <p>The resulting string</p>
6705
   */
6706
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6707
  {
6708
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6709
  }
6710
6711
  /**
6712
   * alias for "UTF8::ucfirst()"
6713
   *
6714
   * @see UTF8::ucfirst()
6715
   *
6716
   * @param string  $word
6717
   * @param string  $encoding
6718
   * @param boolean $cleanUtf8
6719
   *
6720
   * @return string
6721
   */
6722
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6723
  {
6724
    return self::ucfirst($word, $encoding, $cleanUtf8);
6725
  }
6726
6727
  /**
6728
   * Uppercase for all words in the string.
6729
   *
6730
   * @param string   $str        <p>The input string.</p>
6731
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6732
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6733
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6734
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6735
   *
6736
   * @return string
6737
   */
6738
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6739
  {
6740
    if (!$str) {
6741
      return '';
6742
    }
6743
6744
    $words = self::str_to_words($str, $charlist);
6745
    $newwords = array();
6746
6747
    if (count($exceptions) > 0) {
6748
      $useExceptions = true;
6749
    } else {
6750
      $useExceptions = false;
6751
    }
6752
6753
    foreach ($words as $word) {
6754
6755
      if (!$word) {
6756
        continue;
6757
      }
6758
6759
      if (
6760
          ($useExceptions === false)
6761
          ||
6762
          (
6763
              $useExceptions === true
6764
              &&
6765
              !in_array($word, $exceptions, true)
6766
          )
6767
      ) {
6768
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6769
      }
6770
6771
      $newwords[] = $word;
6772
    }
6773
6774
    return implode('', $newwords);
6775
  }
6776
6777
  /**
6778
   * Multi decode html entity & fix urlencoded-win1252-chars.
6779
   *
6780
   * e.g:
6781
   * 'test+test'                     => 'test test'
6782
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6783
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6784
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6785
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6786
   * 'Düsseldorf'                   => 'Düsseldorf'
6787
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6788
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6789
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6790
   *
6791
   * @param string $str          <p>The input string.</p>
6792
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6793
   *
6794
   * @return string
6795
   */
6796 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6797
  {
6798
    $str = (string)$str;
6799
6800
    if (!isset($str[0])) {
6801
      return '';
6802
    }
6803
6804
    $pattern = '/%u([0-9a-f]{3,4})/i';
6805
    if (preg_match($pattern, $str)) {
6806
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6807
    }
6808
6809
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6810
6811
    do {
6812
      $str_compare = $str;
6813
6814
      $str = self::fix_simple_utf8(
6815
          urldecode(
6816
              self::html_entity_decode(
6817
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6818
                  $flags
6819
              )
6820
          )
6821
      );
6822
6823
    } while ($multi_decode === true && $str_compare !== $str);
6824
6825
    return (string)$str;
6826
  }
6827
6828
  /**
6829
   * Return a array with "urlencoded"-win1252 -> UTF-8
6830
   *
6831
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6832
   *
6833
   * @return array
6834
   */
6835
  public static function urldecode_fix_win1252_chars()
6836
  {
6837
    static $array = array(
6838
        '%20' => ' ',
6839
        '%21' => '!',
6840
        '%22' => '"',
6841
        '%23' => '#',
6842
        '%24' => '$',
6843
        '%25' => '%',
6844
        '%26' => '&',
6845
        '%27' => "'",
6846
        '%28' => '(',
6847
        '%29' => ')',
6848
        '%2A' => '*',
6849
        '%2B' => '+',
6850
        '%2C' => ',',
6851
        '%2D' => '-',
6852
        '%2E' => '.',
6853
        '%2F' => '/',
6854
        '%30' => '0',
6855
        '%31' => '1',
6856
        '%32' => '2',
6857
        '%33' => '3',
6858
        '%34' => '4',
6859
        '%35' => '5',
6860
        '%36' => '6',
6861
        '%37' => '7',
6862
        '%38' => '8',
6863
        '%39' => '9',
6864
        '%3A' => ':',
6865
        '%3B' => ';',
6866
        '%3C' => '<',
6867
        '%3D' => '=',
6868
        '%3E' => '>',
6869
        '%3F' => '?',
6870
        '%40' => '@',
6871
        '%41' => 'A',
6872
        '%42' => 'B',
6873
        '%43' => 'C',
6874
        '%44' => 'D',
6875
        '%45' => 'E',
6876
        '%46' => 'F',
6877
        '%47' => 'G',
6878
        '%48' => 'H',
6879
        '%49' => 'I',
6880
        '%4A' => 'J',
6881
        '%4B' => 'K',
6882
        '%4C' => 'L',
6883
        '%4D' => 'M',
6884
        '%4E' => 'N',
6885
        '%4F' => 'O',
6886
        '%50' => 'P',
6887
        '%51' => 'Q',
6888
        '%52' => 'R',
6889
        '%53' => 'S',
6890
        '%54' => 'T',
6891
        '%55' => 'U',
6892
        '%56' => 'V',
6893
        '%57' => 'W',
6894
        '%58' => 'X',
6895
        '%59' => 'Y',
6896
        '%5A' => 'Z',
6897
        '%5B' => '[',
6898
        '%5C' => '\\',
6899
        '%5D' => ']',
6900
        '%5E' => '^',
6901
        '%5F' => '_',
6902
        '%60' => '`',
6903
        '%61' => 'a',
6904
        '%62' => 'b',
6905
        '%63' => 'c',
6906
        '%64' => 'd',
6907
        '%65' => 'e',
6908
        '%66' => 'f',
6909
        '%67' => 'g',
6910
        '%68' => 'h',
6911
        '%69' => 'i',
6912
        '%6A' => 'j',
6913
        '%6B' => 'k',
6914
        '%6C' => 'l',
6915
        '%6D' => 'm',
6916
        '%6E' => 'n',
6917
        '%6F' => 'o',
6918
        '%70' => 'p',
6919
        '%71' => 'q',
6920
        '%72' => 'r',
6921
        '%73' => 's',
6922
        '%74' => 't',
6923
        '%75' => 'u',
6924
        '%76' => 'v',
6925
        '%77' => 'w',
6926
        '%78' => 'x',
6927
        '%79' => 'y',
6928
        '%7A' => 'z',
6929
        '%7B' => '{',
6930
        '%7C' => '|',
6931
        '%7D' => '}',
6932
        '%7E' => '~',
6933
        '%7F' => '',
6934
        '%80' => '`',
6935
        '%81' => '',
6936
        '%82' => '‚',
6937
        '%83' => 'ƒ',
6938
        '%84' => '„',
6939
        '%85' => '…',
6940
        '%86' => '†',
6941
        '%87' => '‡',
6942
        '%88' => 'ˆ',
6943
        '%89' => '‰',
6944
        '%8A' => 'Š',
6945
        '%8B' => '‹',
6946
        '%8C' => 'Œ',
6947
        '%8D' => '',
6948
        '%8E' => 'Ž',
6949
        '%8F' => '',
6950
        '%90' => '',
6951
        '%91' => '‘',
6952
        '%92' => '’',
6953
        '%93' => '“',
6954
        '%94' => '”',
6955
        '%95' => '•',
6956
        '%96' => '–',
6957
        '%97' => '—',
6958
        '%98' => '˜',
6959
        '%99' => '™',
6960
        '%9A' => 'š',
6961
        '%9B' => '›',
6962
        '%9C' => 'œ',
6963
        '%9D' => '',
6964
        '%9E' => 'ž',
6965
        '%9F' => 'Ÿ',
6966
        '%A0' => '',
6967
        '%A1' => '¡',
6968
        '%A2' => '¢',
6969
        '%A3' => '£',
6970
        '%A4' => '¤',
6971
        '%A5' => '¥',
6972
        '%A6' => '¦',
6973
        '%A7' => '§',
6974
        '%A8' => '¨',
6975
        '%A9' => '©',
6976
        '%AA' => 'ª',
6977
        '%AB' => '«',
6978
        '%AC' => '¬',
6979
        '%AD' => '',
6980
        '%AE' => '®',
6981
        '%AF' => '¯',
6982
        '%B0' => '°',
6983
        '%B1' => '±',
6984
        '%B2' => '²',
6985
        '%B3' => '³',
6986
        '%B4' => '´',
6987
        '%B5' => 'µ',
6988
        '%B6' => '¶',
6989
        '%B7' => '·',
6990
        '%B8' => '¸',
6991
        '%B9' => '¹',
6992
        '%BA' => 'º',
6993
        '%BB' => '»',
6994
        '%BC' => '¼',
6995
        '%BD' => '½',
6996
        '%BE' => '¾',
6997
        '%BF' => '¿',
6998
        '%C0' => 'À',
6999
        '%C1' => 'Á',
7000
        '%C2' => 'Â',
7001
        '%C3' => 'Ã',
7002
        '%C4' => 'Ä',
7003
        '%C5' => 'Å',
7004
        '%C6' => 'Æ',
7005
        '%C7' => 'Ç',
7006
        '%C8' => 'È',
7007
        '%C9' => 'É',
7008
        '%CA' => 'Ê',
7009
        '%CB' => 'Ë',
7010
        '%CC' => 'Ì',
7011
        '%CD' => 'Í',
7012
        '%CE' => 'Î',
7013
        '%CF' => 'Ï',
7014
        '%D0' => 'Ð',
7015
        '%D1' => 'Ñ',
7016
        '%D2' => 'Ò',
7017
        '%D3' => 'Ó',
7018
        '%D4' => 'Ô',
7019
        '%D5' => 'Õ',
7020
        '%D6' => 'Ö',
7021
        '%D7' => '×',
7022
        '%D8' => 'Ø',
7023
        '%D9' => 'Ù',
7024
        '%DA' => 'Ú',
7025
        '%DB' => 'Û',
7026
        '%DC' => 'Ü',
7027
        '%DD' => 'Ý',
7028
        '%DE' => 'Þ',
7029
        '%DF' => 'ß',
7030
        '%E0' => 'à',
7031
        '%E1' => 'á',
7032
        '%E2' => 'â',
7033
        '%E3' => 'ã',
7034
        '%E4' => 'ä',
7035
        '%E5' => 'å',
7036
        '%E6' => 'æ',
7037
        '%E7' => 'ç',
7038
        '%E8' => 'è',
7039
        '%E9' => 'é',
7040
        '%EA' => 'ê',
7041
        '%EB' => 'ë',
7042
        '%EC' => 'ì',
7043
        '%ED' => 'í',
7044
        '%EE' => 'î',
7045
        '%EF' => 'ï',
7046
        '%F0' => 'ð',
7047
        '%F1' => 'ñ',
7048
        '%F2' => 'ò',
7049
        '%F3' => 'ó',
7050
        '%F4' => 'ô',
7051
        '%F5' => 'õ',
7052
        '%F6' => 'ö',
7053
        '%F7' => '÷',
7054
        '%F8' => 'ø',
7055
        '%F9' => 'ù',
7056
        '%FA' => 'ú',
7057
        '%FB' => 'û',
7058
        '%FC' => 'ü',
7059
        '%FD' => 'ý',
7060
        '%FE' => 'þ',
7061
        '%FF' => 'ÿ',
7062
    );
7063
7064
    return $array;
7065
  }
7066
7067
  /**
7068
   * Decodes an UTF-8 string to ISO-8859-1.
7069
   *
7070
   * @param string $str <p>The input string.</p>
7071
   *
7072
   * @return string
7073
   */
7074
  public static function utf8_decode($str)
7075
  {
7076
    // init
7077
    $str = (string)$str;
7078
7079
    if (!isset($str[0])) {
7080
      return '';
7081
    }
7082
7083
    $str = (string)self::to_utf8($str);
7084
7085
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7086
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7087
7088
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7089
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
7090
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
7091
    }
7092
7093
    /** @noinspection PhpInternalEntityUsedInspection */
7094
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
7095
  }
7096
7097
  /**
7098
   * Encodes an ISO-8859-1 string to UTF-8.
7099
   *
7100
   * @param string $str <p>The input string.</p>
7101
   *
7102
   * @return string
7103
   */
7104
  public static function utf8_encode($str)
7105
  {
7106
    // init
7107
    $str = (string)$str;
7108
7109
    if (!isset($str[0])) {
7110
      return '';
7111
    }
7112
7113
    $str = \utf8_encode($str);
7114
7115
    if (false === strpos($str, "\xC2")) {
7116
      return $str;
7117
    } else {
7118
7119
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
7120
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
7121
7122
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7123
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
7124
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
7125
      }
7126
7127
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7128
    }
7129
  }
7130
7131
  /**
7132
   * fix -> utf8-win1252 chars
7133
   *
7134
   * @param string $str <p>The input string.</p>
7135
   *
7136
   * @return string
7137
   *
7138
   * @deprecated use "UTF8::fix_simple_utf8()"
7139
   */
7140
  public static function utf8_fix_win1252_chars($str)
7141
  {
7142
    return self::fix_simple_utf8($str);
7143
  }
7144
7145
  /**
7146
   * Returns an array with all utf8 whitespace characters.
7147
   *
7148
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7149
   *
7150
   * @author: Derek E. [email protected]
7151
   *
7152
   * @return array <p>
7153
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7154
   *               as defined in above URL.
7155
   *               </p>
7156
   */
7157
  public static function whitespace_table()
7158
  {
7159
    return self::$whitespaceTable;
7160
  }
7161
7162
  /**
7163
   * Limit the number of words in a string.
7164
   *
7165
   * @param string $str      <p>The input string.</p>
7166
   * @param int    $words    <p>The limit of words as integer.</p>
7167
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7168
   *
7169
   * @return string
7170
   */
7171
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7172
  {
7173
    $str = (string)$str;
7174
7175
    if (!isset($str[0])) {
7176
      return '';
7177
    }
7178
7179
    $words = (int)$words;
7180
7181
    if ($words < 1) {
7182
      return '';
7183
    }
7184
7185
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7186
7187
    if (
7188
        !isset($matches[0])
7189
        ||
7190
        self::strlen($str) === self::strlen($matches[0])
7191
    ) {
7192
      return $str;
7193
    }
7194
7195
    return self::rtrim($matches[0]) . $strAddOn;
7196
  }
7197
7198
  /**
7199
   * Wraps a string to a given number of characters
7200
   *
7201
   * @link  http://php.net/manual/en/function.wordwrap.php
7202
   *
7203
   * @param string $str   <p>The input string.</p>
7204
   * @param int    $width [optional] <p>The column width.</p>
7205
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7206
   * @param bool   $cut   [optional] <p>
7207
   *                      If the cut is set to true, the string is
7208
   *                      always wrapped at or before the specified width. So if you have
7209
   *                      a word that is larger than the given width, it is broken apart.
7210
   *                      </p>
7211
   *
7212
   * @return string <p>The given string wrapped at the specified column.</p>
7213
   */
7214
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7215
  {
7216
    $str = (string)$str;
7217
    $break = (string)$break;
7218
7219
    if (!isset($str[0], $break[0])) {
7220
      return '';
7221
    }
7222
7223
    $w = '';
7224
    $strSplit = explode($break, $str);
7225
    $count = count($strSplit);
7226
7227
    $chars = array();
7228
    /** @noinspection ForeachInvariantsInspection */
7229
    for ($i = 0; $i < $count; ++$i) {
7230
7231
      if ($i) {
7232
        $chars[] = $break;
7233
        $w .= '#';
7234
      }
7235
7236
      $c = $strSplit[$i];
7237
      unset($strSplit[$i]);
7238
7239
      foreach (self::split($c) as $c) {
7240
        $chars[] = $c;
7241
        $w .= ' ' === $c ? ' ' : '?';
7242
      }
7243
    }
7244
7245
    $strReturn = '';
7246
    $j = 0;
7247
    $b = $i = -1;
7248
    $w = wordwrap($w, $width, '#', $cut);
7249
7250
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7251
      for (++$i; $i < $b; ++$i) {
7252
        $strReturn .= $chars[$j];
7253
        unset($chars[$j++]);
7254
      }
7255
7256
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7257
        unset($chars[$j++]);
7258
      }
7259
7260
      $strReturn .= $break;
7261
    }
7262
7263
    return $strReturn . implode('', $chars);
7264
  }
7265
7266
  /**
7267
   * Returns an array of Unicode White Space characters.
7268
   *
7269
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7270
   */
7271
  public static function ws()
7272
  {
7273
    return self::$whitespace;
7274
  }
7275
7276
}
7277