Completed
Push — master ( 3148f6...e0f452 )
by Lars
03:35
created

UTF8::is_binary()   B

Complexity

Conditions 5
Paths 4

Size

Total Lines 17
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 5

Importance

Changes 0
Metric Value
cc 5
eloc 9
nc 4
nop 1
dl 0
loc 17
ccs 9
cts 9
cp 1
crap 5
rs 8.8571
c 0
b 0
f 0
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
93
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
94
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
96
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
98
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
99
  );
100
101
  /**
102
   * Numeric code point => UTF-8 Character
103
   *
104
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
105
   *
106
   * @var array
107
   */
108
  private static $whitespace = array(
109
    // NUL Byte
110
    0     => "\x0",
111
    // Tab
112
    9     => "\x9",
113
    // New Line
114
    10    => "\xa",
115
    // Vertical Tab
116
    11    => "\xb",
117
    // Carriage Return
118
    13    => "\xd",
119
    // Ordinary Space
120
    32    => "\x20",
121
    // NO-BREAK SPACE
122
    160   => "\xc2\xa0",
123
    // OGHAM SPACE MARK
124
    5760  => "\xe1\x9a\x80",
125
    // MONGOLIAN VOWEL SEPARATOR
126
    6158  => "\xe1\xa0\x8e",
127
    // EN QUAD
128
    8192  => "\xe2\x80\x80",
129
    // EM QUAD
130
    8193  => "\xe2\x80\x81",
131
    // EN SPACE
132
    8194  => "\xe2\x80\x82",
133
    // EM SPACE
134
    8195  => "\xe2\x80\x83",
135
    // THREE-PER-EM SPACE
136
    8196  => "\xe2\x80\x84",
137
    // FOUR-PER-EM SPACE
138
    8197  => "\xe2\x80\x85",
139
    // SIX-PER-EM SPACE
140
    8198  => "\xe2\x80\x86",
141
    // FIGURE SPACE
142
    8199  => "\xe2\x80\x87",
143
    // PUNCTUATION SPACE
144
    8200  => "\xe2\x80\x88",
145
    // THIN SPACE
146
    8201  => "\xe2\x80\x89",
147
    //HAIR SPACE
148
    8202  => "\xe2\x80\x8a",
149
    // LINE SEPARATOR
150
    8232  => "\xe2\x80\xa8",
151
    // PARAGRAPH SEPARATOR
152
    8233  => "\xe2\x80\xa9",
153
    // NARROW NO-BREAK SPACE
154
    8239  => "\xe2\x80\xaf",
155
    // MEDIUM MATHEMATICAL SPACE
156
    8287  => "\xe2\x81\x9f",
157
    // IDEOGRAPHIC SPACE
158
    12288 => "\xe3\x80\x80",
159
  );
160
161
  /**
162
   * @var array
163
   */
164
  private static $whitespaceTable = array(
165
      'SPACE'                     => "\x20",
166
      'NO-BREAK SPACE'            => "\xc2\xa0",
167
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
168
      'EN QUAD'                   => "\xe2\x80\x80",
169
      'EM QUAD'                   => "\xe2\x80\x81",
170
      'EN SPACE'                  => "\xe2\x80\x82",
171
      'EM SPACE'                  => "\xe2\x80\x83",
172
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
173
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
174
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
175
      'FIGURE SPACE'              => "\xe2\x80\x87",
176
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
177
      'THIN SPACE'                => "\xe2\x80\x89",
178
      'HAIR SPACE'                => "\xe2\x80\x8a",
179
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
180
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
181
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
182
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
183
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
184
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
185
  );
186
187
  /**
188
   * bidirectional text chars
189
   *
190
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
191
   *
192
   * @var array
193
   */
194
  private static $bidiUniCodeControlsTable = array(
195
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
196
    8234 => "\xE2\x80\xAA",
197
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
198
    8235 => "\xE2\x80\xAB",
199
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
200
    8236 => "\xE2\x80\xAC",
201
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
202
    8237 => "\xE2\x80\xAD",
203
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
204
    8238 => "\xE2\x80\xAE",
205
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
206
    8294 => "\xE2\x81\xA6",
207
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
208
    8295 => "\xE2\x81\xA7",
209
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
210
    8296 => "\xE2\x81\xA8",
211
    // POP DIRECTIONAL ISOLATE
212
    8297 => "\xE2\x81\xA9",
213
  );
214
215
  /**
216
   * @var array
217
   */
218
  private static $commonCaseFold = array(
219
      'ſ'            => 's',
220
      "\xCD\x85"     => 'ι',
221
      'ς'            => 'σ',
222
      "\xCF\x90"     => 'β',
223
      "\xCF\x91"     => 'θ',
224
      "\xCF\x95"     => 'φ',
225
      "\xCF\x96"     => 'π',
226
      "\xCF\xB0"     => 'κ',
227
      "\xCF\xB1"     => 'ρ',
228
      "\xCF\xB5"     => 'ε',
229
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
230
      "\xE1\xBE\xBE" => 'ι',
231
  );
232
233
  /**
234
   * @var array
235
   */
236
  private static $brokenUtf8ToUtf8 = array(
237
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
238
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
239
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
240
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
241
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
242
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
243
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
244
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
245
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
246
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
247
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
248
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
249
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
250
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
251
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
252
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
253
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
254
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
255
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
256
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
257
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
258
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
259
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
260
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
261
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
262
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
263
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
264
      'ü'       => 'ü',
265
      'ä'       => 'ä',
266
      'ö'       => 'ö',
267
      'Ö'       => 'Ö',
268
      'ß'       => 'ß',
269
      'Ã '       => 'à',
270
      'á'       => 'á',
271
      'â'       => 'â',
272
      'ã'       => 'ã',
273
      'ù'       => 'ù',
274
      'ú'       => 'ú',
275
      'û'       => 'û',
276
      'Ù'       => 'Ù',
277
      'Ú'       => 'Ú',
278
      'Û'       => 'Û',
279
      'Ü'       => 'Ü',
280
      'ò'       => 'ò',
281
      'ó'       => 'ó',
282
      'ô'       => 'ô',
283
      'è'       => 'è',
284
      'é'       => 'é',
285
      'ê'       => 'ê',
286
      'ë'       => 'ë',
287
      'À'       => 'À',
288
      'Á'       => 'Á',
289
      'Â'       => 'Â',
290
      'Ã'       => 'Ã',
291
      'Ä'       => 'Ä',
292
      'Ã…'       => 'Å',
293
      'Ç'       => 'Ç',
294
      'È'       => 'È',
295
      'É'       => 'É',
296
      'Ê'       => 'Ê',
297
      'Ë'       => 'Ë',
298
      'ÃŒ'       => 'Ì',
299
      'Í'       => 'Í',
300
      'ÃŽ'       => 'Î',
301
      'Ï'       => 'Ï',
302
      'Ñ'       => 'Ñ',
303
      'Ã’'       => 'Ò',
304
      'Ó'       => 'Ó',
305
      'Ô'       => 'Ô',
306
      'Õ'       => 'Õ',
307
      'Ø'       => 'Ø',
308
      'Ã¥'       => 'å',
309
      'æ'       => 'æ',
310
      'ç'       => 'ç',
311
      'ì'       => 'ì',
312
      'í'       => 'í',
313
      'î'       => 'î',
314
      'ï'       => 'ï',
315
      'ð'       => 'ð',
316
      'ñ'       => 'ñ',
317
      'õ'       => 'õ',
318
      'ø'       => 'ø',
319
      'ý'       => 'ý',
320
      'ÿ'       => 'ÿ',
321
      '€'      => '€',
322
      '’'      => '’',
323
  );
324
325
  /**
326
   * @var array
327
   */
328
  private static $utf8ToWin1252 = array(
329
      "\xe2\x82\xac" => "\x80", // EURO SIGN
330
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
331
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
332
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
333
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
334
      "\xe2\x80\xa0" => "\x86", // DAGGER
335
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
336
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
337
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
338
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
339
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
340
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
341
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
342
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
343
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
344
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
345
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
346
      "\xe2\x80\xa2" => "\x95", // BULLET
347
      "\xe2\x80\x93" => "\x96", // EN DASH
348
      "\xe2\x80\x94" => "\x97", // EM DASH
349
      "\xcb\x9c"     => "\x98", // SMALL TILDE
350
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
351
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
352
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
353
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
354
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
355
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
356
  );
357
358
  /**
359
   * @var array
360
   */
361
  private static $utf8MSWord = array(
362
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
363
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
364
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
365
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
366
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
367
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
368
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
369
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
370
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
371
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
372
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
373
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
374
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
375
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
376
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
377
  );
378
379
  /**
380
   * @var array
381
   */
382
  private static $iconvEncoding = array(
383
      'ANSI_X3.4-1968',
384
      'ANSI_X3.4-1986',
385
      'ASCII',
386
      'CP367',
387
      'IBM367',
388
      'ISO-IR-6',
389
      'ISO646-US',
390
      'ISO_646.IRV:1991',
391
      'US',
392
      'US-ASCII',
393
      'CSASCII',
394
      'UTF-8',
395
      'ISO-10646-UCS-2',
396
      'UCS-2',
397
      'CSUNICODE',
398
      'UCS-2BE',
399
      'UNICODE-1-1',
400
      'UNICODEBIG',
401
      'CSUNICODE11',
402
      'UCS-2LE',
403
      'UNICODELITTLE',
404
      'ISO-10646-UCS-4',
405
      'UCS-4',
406
      'CSUCS4',
407
      'UCS-4BE',
408
      'UCS-4LE',
409
      'UTF-16',
410
      'UTF-16BE',
411
      'UTF-16LE',
412
      'UTF-32',
413
      'UTF-32BE',
414
      'UTF-32LE',
415
      'UNICODE-1-1-UTF-7',
416
      'UTF-7',
417
      'CSUNICODE11UTF7',
418
      'UCS-2-INTERNAL',
419
      'UCS-2-SWAPPED',
420
      'UCS-4-INTERNAL',
421
      'UCS-4-SWAPPED',
422
      'C99',
423
      'JAVA',
424
      'CP819',
425
      'IBM819',
426
      'ISO-8859-1',
427
      'ISO-IR-100',
428
      'ISO8859-1',
429
      'ISO_8859-1',
430
      'ISO_8859-1:1987',
431
      'L1',
432
      'LATIN1',
433
      'CSISOLATIN1',
434
      'ISO-8859-2',
435
      'ISO-IR-101',
436
      'ISO8859-2',
437
      'ISO_8859-2',
438
      'ISO_8859-2:1987',
439
      'L2',
440
      'LATIN2',
441
      'CSISOLATIN2',
442
      'ISO-8859-3',
443
      'ISO-IR-109',
444
      'ISO8859-3',
445
      'ISO_8859-3',
446
      'ISO_8859-3:1988',
447
      'L3',
448
      'LATIN3',
449
      'CSISOLATIN3',
450
      'ISO-8859-4',
451
      'ISO-IR-110',
452
      'ISO8859-4',
453
      'ISO_8859-4',
454
      'ISO_8859-4:1988',
455
      'L4',
456
      'LATIN4',
457
      'CSISOLATIN4',
458
      'CYRILLIC',
459
      'ISO-8859-5',
460
      'ISO-IR-144',
461
      'ISO8859-5',
462
      'ISO_8859-5',
463
      'ISO_8859-5:1988',
464
      'CSISOLATINCYRILLIC',
465
      'ARABIC',
466
      'ASMO-708',
467
      'ECMA-114',
468
      'ISO-8859-6',
469
      'ISO-IR-127',
470
      'ISO8859-6',
471
      'ISO_8859-6',
472
      'ISO_8859-6:1987',
473
      'CSISOLATINARABIC',
474
      'ECMA-118',
475
      'ELOT_928',
476
      'GREEK',
477
      'GREEK8',
478
      'ISO-8859-7',
479
      'ISO-IR-126',
480
      'ISO8859-7',
481
      'ISO_8859-7',
482
      'ISO_8859-7:1987',
483
      'ISO_8859-7:2003',
484
      'CSISOLATINGREEK',
485
      'HEBREW',
486
      'ISO-8859-8',
487
      'ISO-IR-138',
488
      'ISO8859-8',
489
      'ISO_8859-8',
490
      'ISO_8859-8:1988',
491
      'CSISOLATINHEBREW',
492
      'ISO-8859-9',
493
      'ISO-IR-148',
494
      'ISO8859-9',
495
      'ISO_8859-9',
496
      'ISO_8859-9:1989',
497
      'L5',
498
      'LATIN5',
499
      'CSISOLATIN5',
500
      'ISO-8859-10',
501
      'ISO-IR-157',
502
      'ISO8859-10',
503
      'ISO_8859-10',
504
      'ISO_8859-10:1992',
505
      'L6',
506
      'LATIN6',
507
      'CSISOLATIN6',
508
      'ISO-8859-11',
509
      'ISO8859-11',
510
      'ISO_8859-11',
511
      'ISO-8859-13',
512
      'ISO-IR-179',
513
      'ISO8859-13',
514
      'ISO_8859-13',
515
      'L7',
516
      'LATIN7',
517
      'ISO-8859-14',
518
      'ISO-CELTIC',
519
      'ISO-IR-199',
520
      'ISO8859-14',
521
      'ISO_8859-14',
522
      'ISO_8859-14:1998',
523
      'L8',
524
      'LATIN8',
525
      'ISO-8859-15',
526
      'ISO-IR-203',
527
      'ISO8859-15',
528
      'ISO_8859-15',
529
      'ISO_8859-15:1998',
530
      'LATIN-9',
531
      'ISO-8859-16',
532
      'ISO-IR-226',
533
      'ISO8859-16',
534
      'ISO_8859-16',
535
      'ISO_8859-16:2001',
536
      'L10',
537
      'LATIN10',
538
      'KOI8-R',
539
      'CSKOI8R',
540
      'KOI8-U',
541
      'KOI8-RU',
542
      'CP1250',
543
      'MS-EE',
544
      'WINDOWS-1250',
545
      'CP1251',
546
      'MS-CYRL',
547
      'WINDOWS-1251',
548
      'CP1252',
549
      'MS-ANSI',
550
      'WINDOWS-1252',
551
      'CP1253',
552
      'MS-GREEK',
553
      'WINDOWS-1253',
554
      'CP1254',
555
      'MS-TURK',
556
      'WINDOWS-1254',
557
      'CP1255',
558
      'MS-HEBR',
559
      'WINDOWS-1255',
560
      'CP1256',
561
      'MS-ARAB',
562
      'WINDOWS-1256',
563
      'CP1257',
564
      'WINBALTRIM',
565
      'WINDOWS-1257',
566
      'CP1258',
567
      'WINDOWS-1258',
568
      '850',
569
      'CP850',
570
      'IBM850',
571
      'CSPC850MULTILINGUAL',
572
      '862',
573
      'CP862',
574
      'IBM862',
575
      'CSPC862LATINHEBREW',
576
      '866',
577
      'CP866',
578
      'IBM866',
579
      'CSIBM866',
580
      'MAC',
581
      'MACINTOSH',
582
      'MACROMAN',
583
      'CSMACINTOSH',
584
      'MACCENTRALEUROPE',
585
      'MACICELAND',
586
      'MACCROATIAN',
587
      'MACROMANIA',
588
      'MACCYRILLIC',
589
      'MACUKRAINE',
590
      'MACGREEK',
591
      'MACTURKISH',
592
      'MACHEBREW',
593
      'MACARABIC',
594
      'MACTHAI',
595
      'HP-ROMAN8',
596
      'R8',
597
      'ROMAN8',
598
      'CSHPROMAN8',
599
      'NEXTSTEP',
600
      'ARMSCII-8',
601
      'GEORGIAN-ACADEMY',
602
      'GEORGIAN-PS',
603
      'KOI8-T',
604
      'CP154',
605
      'CYRILLIC-ASIAN',
606
      'PT154',
607
      'PTCP154',
608
      'CSPTCP154',
609
      'KZ-1048',
610
      'RK1048',
611
      'STRK1048-2002',
612
      'CSKZ1048',
613
      'MULELAO-1',
614
      'CP1133',
615
      'IBM-CP1133',
616
      'ISO-IR-166',
617
      'TIS-620',
618
      'TIS620',
619
      'TIS620-0',
620
      'TIS620.2529-1',
621
      'TIS620.2533-0',
622
      'TIS620.2533-1',
623
      'CP874',
624
      'WINDOWS-874',
625
      'VISCII',
626
      'VISCII1.1-1',
627
      'CSVISCII',
628
      'TCVN',
629
      'TCVN-5712',
630
      'TCVN5712-1',
631
      'TCVN5712-1:1993',
632
      'ISO-IR-14',
633
      'ISO646-JP',
634
      'JIS_C6220-1969-RO',
635
      'JP',
636
      'CSISO14JISC6220RO',
637
      'JISX0201-1976',
638
      'JIS_X0201',
639
      'X0201',
640
      'CSHALFWIDTHKATAKANA',
641
      'ISO-IR-87',
642
      'JIS0208',
643
      'JIS_C6226-1983',
644
      'JIS_X0208',
645
      'JIS_X0208-1983',
646
      'JIS_X0208-1990',
647
      'X0208',
648
      'CSISO87JISX0208',
649
      'ISO-IR-159',
650
      'JIS_X0212',
651
      'JIS_X0212-1990',
652
      'JIS_X0212.1990-0',
653
      'X0212',
654
      'CSISO159JISX02121990',
655
      'CN',
656
      'GB_1988-80',
657
      'ISO-IR-57',
658
      'ISO646-CN',
659
      'CSISO57GB1988',
660
      'CHINESE',
661
      'GB_2312-80',
662
      'ISO-IR-58',
663
      'CSISO58GB231280',
664
      'CN-GB-ISOIR165',
665
      'ISO-IR-165',
666
      'ISO-IR-149',
667
      'KOREAN',
668
      'KSC_5601',
669
      'KS_C_5601-1987',
670
      'KS_C_5601-1989',
671
      'CSKSC56011987',
672
      'EUC-JP',
673
      'EUCJP',
674
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
675
      'CSEUCPKDFMTJAPANESE',
676
      'MS_KANJI',
677
      'SHIFT-JIS',
678
      'SHIFT_JIS',
679
      'SJIS',
680
      'CSSHIFTJIS',
681
      'CP932',
682
      'ISO-2022-JP',
683
      'CSISO2022JP',
684
      'ISO-2022-JP-1',
685
      'ISO-2022-JP-2',
686
      'CSISO2022JP2',
687
      'CN-GB',
688
      'EUC-CN',
689
      'EUCCN',
690
      'GB2312',
691
      'CSGB2312',
692
      'GBK',
693
      'CP936',
694
      'MS936',
695
      'WINDOWS-936',
696
      'GB18030',
697
      'ISO-2022-CN',
698
      'CSISO2022CN',
699
      'ISO-2022-CN-EXT',
700
      'HZ',
701
      'HZ-GB-2312',
702
      'EUC-TW',
703
      'EUCTW',
704
      'CSEUCTW',
705
      'BIG-5',
706
      'BIG-FIVE',
707
      'BIG5',
708
      'BIGFIVE',
709
      'CN-BIG5',
710
      'CSBIG5',
711
      'CP950',
712
      'BIG5-HKSCS:1999',
713
      'BIG5-HKSCS:2001',
714
      'BIG5-HKSCS',
715
      'BIG5-HKSCS:2004',
716
      'BIG5HKSCS',
717
      'EUC-KR',
718
      'EUCKR',
719
      'CSEUCKR',
720
      'CP949',
721
      'UHC',
722
      'CP1361',
723
      'JOHAB',
724
      'ISO-2022-KR',
725
      'CSISO2022KR',
726
      'CP856',
727
      'CP922',
728
      'CP943',
729
      'CP1046',
730
      'CP1124',
731
      'CP1129',
732
      'CP1161',
733
      'IBM-1161',
734
      'IBM1161',
735
      'CSIBM1161',
736
      'CP1162',
737
      'IBM-1162',
738
      'IBM1162',
739
      'CSIBM1162',
740
      'CP1163',
741
      'IBM-1163',
742
      'IBM1163',
743
      'CSIBM1163',
744
      'DEC-KANJI',
745
      'DEC-HANYU',
746
      '437',
747
      'CP437',
748
      'IBM437',
749
      'CSPC8CODEPAGE437',
750
      'CP737',
751
      'CP775',
752
      'IBM775',
753
      'CSPC775BALTIC',
754
      '852',
755
      'CP852',
756
      'IBM852',
757
      'CSPCP852',
758
      'CP853',
759
      '855',
760
      'CP855',
761
      'IBM855',
762
      'CSIBM855',
763
      '857',
764
      'CP857',
765
      'IBM857',
766
      'CSIBM857',
767
      'CP858',
768
      '860',
769
      'CP860',
770
      'IBM860',
771
      'CSIBM860',
772
      '861',
773
      'CP-IS',
774
      'CP861',
775
      'IBM861',
776
      'CSIBM861',
777
      '863',
778
      'CP863',
779
      'IBM863',
780
      'CSIBM863',
781
      'CP864',
782
      'IBM864',
783
      'CSIBM864',
784
      '865',
785
      'CP865',
786
      'IBM865',
787
      'CSIBM865',
788
      '869',
789
      'CP-GR',
790
      'CP869',
791
      'IBM869',
792
      'CSIBM869',
793
      'CP1125',
794
      'EUC-JISX0213',
795
      'SHIFT_JISX0213',
796
      'ISO-2022-JP-3',
797
      'BIG5-2003',
798
      'ISO-IR-230',
799
      'TDS565',
800
      'ATARI',
801
      'ATARIST',
802
      'RISCOS-LATIN1',
803
  );
804
805
  /**
806
   * @var array
807 1
   */
808
  private static $support = array();
809 1
810 1
  /**
811
   * __construct()
812
   */
813
  public function __construct()
814
  {
815
    self::checkForSupport();
816
  }
817
818
  /**
819
   * Return the character at the specified position: $str[1] like functionality.
820 2
   *
821
   * @param string $str <p>A UTF-8 string.</p>
822 2
   * @param int    $pos <p>The position of character to return.</p>
823
   *
824
   * @return string <p>Single Multi-Byte character.</p>
825
   */
826
  public static function access($str, $pos)
827
  {
828
    $str = (string)$str;
829
    $pos = (int)$pos;
830
831
    if (!isset($str[0])) {
832
      return '';
833
    }
834 1
835
    if ($pos < 0) {
836 1
      return '';
837 1
    }
838 1
839
    return self::substr($str, $pos, 1);
840 1
  }
841
842
  /**
843
   * Prepends UTF-8 BOM character to the string and returns the whole string.
844
   *
845
   * INFO: If BOM already existed there, the Input string is returned.
846
   *
847
   * @param string $str <p>The input string.</p>
848
   *
849
   * @return string <p>The output string that contains BOM.</p>
850 1
   */
851
  public static function add_bom_to_string($str)
852 1
  {
853
    if (self::string_has_bom($str) === false) {
854
      $str = self::bom() . $str;
855
    }
856
857
    return $str;
858
  }
859
860 2
  /**
861
   * Convert binary into an string.
862 2
   *
863
   * @param mixed $bin 1|0
864
   *
865
   * @return string
866
   */
867
  public static function binary_to_str($bin)
868
  {
869
    if (!isset($bin[0])) {
870
      return '';
871
    }
872
873
    return pack('H*', base_convert($bin, 2, 16));
874 1
  }
875
876 1
  /**
877
   * Returns the UTF-8 Byte Order Mark Character.
878
   *
879
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
880
   *
881
   * @return string UTF-8 Byte Order Mark
882
   */
883
  public static function bom()
884 2
  {
885
    return "\xef\xbb\xbf";
886 2
  }
887
888 1
  /**
889
   * @alias of UTF8::chr_map()
890 1
   *
891 1
   * @see   UTF8::chr_map()
892 1
   *
893 1
   * @param string|array $callback
894 1
   * @param string       $str
895 1
   *
896 2
   * @return array
897
   */
898
  public static function callback($callback, $str)
899
  {
900
    return self::chr_map($callback, $str);
901
  }
902
903
  /**
904
   * This method will auto-detect your server environment for UTF-8 support.
905
   *
906
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
907 9
   */
908
  public static function checkForSupport()
909 9
  {
910 9
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
911 1
912
      self::$support['already_checked_via_portable_utf8'] = true;
913
914 9
      // http://php.net/manual/en/book.mbstring.php
915
      self::$support['mbstring'] = self::mbstring_loaded();
916
917
      // http://php.net/manual/en/book.iconv.php
918 9
      self::$support['iconv'] = self::iconv_loaded();
919
920
      // http://php.net/manual/en/book.intl.php
921
      self::$support['intl'] = self::intl_loaded();
922
923 9
      // http://php.net/manual/en/class.intlchar.php
924 9
      self::$support['intlChar'] = self::intlChar_loaded();
925 8
926
      // http://php.net/manual/en/book.pcre.php
927
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
928
    }
929 8
  }
930 6
931
  /**
932
   * Generates a UTF-8 encoded character from the given code point.
933 7
   *
934 6
   * INFO: opposite to UTF8::ord()
935 6
   *
936
   * @param int    $code_point <p>The code point for which to generate a character.</p>
937
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
938 7
   *
939 7
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
940 7
   */
941 7
  public static function chr($code_point, $encoding = 'UTF-8')
942
  {
943
    $i = (int)$code_point;
944 1
    if ($i !== $code_point) {
945 1
      return null;
946 1
    }
947 1
948 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
949
      self::checkForSupport();
950
    }
951
952
    if ($encoding !== 'UTF-8') {
953
      $encoding = self::normalize_encoding($encoding);
954
    } elseif (self::$support['intlChar'] === true) {
955
      return \IntlChar::chr($code_point);
956
    }
957
958
    // use static cache, if there is no support for "IntlChar"
959
    static $cache = array();
960
    $cacheKey = $code_point . $encoding;
961
    if (isset($cache[$cacheKey]) === true) {
962
      return $cache[$cacheKey];
963 1
    }
964
965 1
    if (0x80 > $code_point %= 0x200000) {
966
      $str = chr($code_point);
967 1
    } elseif (0x800 > $code_point) {
968
      $str = chr(0xC0 | $code_point >> 6) .
969
             chr(0x80 | $code_point & 0x3F);
970
    } elseif (0x10000 > $code_point) {
971
      $str = chr(0xE0 | $code_point >> 12) .
972
             chr(0x80 | $code_point >> 6 & 0x3F) .
973
             chr(0x80 | $code_point & 0x3F);
974
    } else {
975
      $str = chr(0xF0 | $code_point >> 18) .
976
             chr(0x80 | $code_point >> 12 & 0x3F) .
977
             chr(0x80 | $code_point >> 6 & 0x3F) .
978
             chr(0x80 | $code_point & 0x3F);
979
    }
980
981
    if ($encoding !== 'UTF-8') {
982 4
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
983
    }
984 4
985 3
    // add into static cache
986
    $cache[$cacheKey] = $str;
987
988 4
    return $str;
989
  }
990
991
  /**
992
   * Applies callback to all characters of a string.
993
   *
994
   * @param string|array $callback <p>The callback function.</p>
995
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
996
   *
997
   * @return array <p>The outcome of callback.</p>
998 2
   */
999
  public static function chr_map($callback, $str)
1000 2
  {
1001 2
    $chars = self::split($str);
1002 2
1003
    return array_map($callback, $chars);
1004 2
  }
1005
1006 2
  /**
1007
   * Generates an array of byte length of each character of a Unicode string.
1008
   *
1009 2
   * 1 byte => U+0000  - U+007F
1010
   * 2 byte => U+0080  - U+07FF
1011 2
   * 3 byte => U+0800  - U+FFFF
1012 2
   * 4 byte => U+10000 - U+10FFFF
1013 2
   *
1014
   * @param string $str <p>The original Unicode string.</p>
1015 1
   *
1016 1
   * @return array <p>An array of byte lengths of each character.</p>
1017 1
   */
1018
  public static function chr_size_list($str)
1019
  {
1020
    $str = (string)$str;
1021
1022
    if (!isset($str[0])) {
1023 2
      return array();
1024
    }
1025 2
1026 2
    return array_map('strlen', self::split($str));
1027
  }
1028 2
1029
  /**
1030
   * Get a decimal code representation of a specific character.
1031
   *
1032
   * @param string $char <p>The input character.</p>
1033
   *
1034
   * @return int
1035
   */
1036
  public static function chr_to_decimal($char)
1037
  {
1038
    $char = (string)$char;
1039 1
    $code = self::ord($char[0]);
1040
    $bytes = 1;
1041 1
1042
    if (!($code & 0x80)) {
1043
      // 0xxxxxxx
1044
      return $code;
1045
    }
1046
1047
    if (($code & 0xe0) === 0xc0) {
1048
      // 110xxxxx
1049
      $bytes = 2;
1050
      $code &= ~0xc0;
1051
    } elseif (($code & 0xf0) === 0xe0) {
1052
      // 1110xxxx
1053 1
      $bytes = 3;
1054
      $code &= ~0xe0;
1055 1
    } elseif (($code & 0xf8) === 0xf0) {
1056
      // 11110xxx
1057
      $bytes = 4;
1058
      $code &= ~0xf0;
1059
    }
1060
1061
    for ($i = 2; $i <= $bytes; $i++) {
1062
      // 10xxxxxx
1063
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1064
    }
1065
1066
    return $code;
1067
  }
1068
1069
  /**
1070
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1071 44
   *
1072
   * @param string $char <p>The input character</p>
1073
   * @param string $pfix [optional]
1074
   *
1075
   * @return string <p>The code point encoded as U+xxxx<p>
1076
   */
1077
  public static function chr_to_hex($char, $pfix = 'U+')
1078
  {
1079
    if ($char === '&#0;') {
1080
      $char = '';
1081
    }
1082
1083
    return self::int_to_hex(self::ord($char), $pfix);
1084
  }
1085
1086 44
  /**
1087 44
   * alias for "UTF8::chr_to_decimal()"
1088
   *
1089 44
   * @see UTF8::chr_to_decimal()
1090 44
   *
1091
   * @param string $chr
1092 44
   *
1093 17
   * @return int
1094 17
   */
1095
  public static function chr_to_int($chr)
1096 44
  {
1097 12
    return self::chr_to_decimal($chr);
1098 12
  }
1099
1100 44
  /**
1101 5
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1102 5
   *
1103
   * @param string $body     <p>The original string to be split.</p>
1104 44
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1105
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1106
   *
1107
   * @return string <p>The chunked string</p>
1108
   */
1109
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1110
  {
1111
    return implode($end, self::split($body, $chunklen));
1112
  }
1113
1114 4
  /**
1115
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1116 4
   *
1117
   * @param string $str                     <p>The string to be sanitized.</p>
1118 4
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1119 1
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1120
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1121
   *                                        => "..."</p>
1122
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1123 4
   *                                        $normalize_whitespace</p>
1124
   *
1125
   * @return string <p>Clean UTF-8 encoded string.</p>
1126
   */
1127
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1128
  {
1129
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1130 4
    // caused connection reset problem on larger strings
1131
1132 4
    $regx = '/
1133
      (
1134
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1135
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1136
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1137
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1138
        ){1,100}                      # ...one or more times
1139
      )
1140
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1141
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1142
    /x';
1143
    $str = preg_replace($regx, '$1', $str);
1144
1145
    $str = self::replace_diamond_question_mark($str, '');
1146 5
    $str = self::remove_invisible_characters($str);
1147
1148 5
    if ($normalize_whitespace === true) {
1149 5
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1150 5
    }
1151
1152 5
    if ($normalize_msword === true) {
1153
      $str = self::normalize_msword($str);
1154 5
    }
1155 5
1156 5
    if ($remove_bom === true) {
1157
      $str = self::remove_bom($str);
1158 5
    }
1159
1160 5
    return $str;
1161 1
  }
1162
1163 1
  /**
1164 1
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1165 1
   *
1166
   * @param string $str <p>The input string.</p>
1167 1
   *
1168 1
   * @return string
1169
   */
1170 5
  public static function cleanup($str)
1171
  {
1172
    $str = (string)$str;
1173
1174
    if (!isset($str[0])) {
1175
      return '';
1176
    }
1177
1178
    // fixed ISO <-> UTF-8 Errors
1179
    $str = self::fix_simple_utf8($str);
1180
1181
    // remove all none UTF-8 symbols
1182 6
    // && remove diamond question mark (�)
1183
    // && remove remove invisible characters (e.g. "\0")
1184 6
    // && remove BOM
1185
    // && normalize whitespace chars (but keep non-breaking-spaces)
1186
    $str = self::clean($str, true, true, false, true);
1187
1188
    return (string)$str;
1189
  }
1190
1191
  /**
1192
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1193
   *
1194 1
   * INFO: opposite to UTF8::string()
1195
   *
1196 1
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1197 1
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1198 1
   *                                    default, code points will be returned as integers.</p>
1199
   *
1200 1
   * @return array <p>The array of code points.</p>
1201
   */
1202
  public static function codepoints($arg, $u_style = false)
1203
  {
1204
    if (is_string($arg)) {
1205
      $arg = self::split($arg);
1206
    }
1207
1208
    $arg = array_map(
1209
        array(
1210
            '\\voku\\helper\\UTF8',
1211
            'ord',
1212
        ),
1213
        $arg
1214
    );
1215
1216 11
    if ($u_style) {
1217
      $arg = array_map(
1218 11
          array(
1219 11
              '\\voku\\helper\\UTF8',
1220
              'int_to_hex',
1221 11
          ),
1222 5
          $arg
1223
      );
1224
    }
1225 11
1226 1
    return $arg;
1227 1
  }
1228
1229 11
  /**
1230
   * Returns count of characters used in a string.
1231
   *
1232
   * @param string $str       <p>The input string.</p>
1233 11
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1234
   *
1235
   * @return array <p>An associative array of Character as keys and
1236 11
   *               their count as values.</p>
1237
   */
1238 1
  public static function count_chars($str, $cleanUtf8 = false)
1239 11
  {
1240
    return array_count_values(self::split($str, 1, $cleanUtf8));
1241
  }
1242
1243 11
  /**
1244
   * Converts a int-value into an UTF-8 character.
1245
   *
1246 11
   * @param int $int
1247 1
   *
1248 1
   * @return string
1249 1
   */
1250 11
  public static function decimal_to_chr($int)
1251 11
  {
1252
    if (Bootup::is_php('5.4') === true) {
1253
      $flags = ENT_QUOTES | ENT_HTML5;
1254
    } else {
1255
      $flags = ENT_QUOTES;
1256 2
    }
1257
1258
    return self::html_entity_decode('&#' . $int . ';', $flags);
1259 1
  }
1260
1261
  /**
1262 2
   * Encode a string with a new charset-encoding.
1263 1
   *
1264
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1265
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1266 2
   *
1267 2
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1268 2
   * @param string $str      <p>The input string</p>
1269
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1270 2
   *                         /> otherwise we auto-detect the current string-encoding</p>
1271
   *
1272 2
   * @return string
1273 2
   */
1274
  public static function encode($encoding, $str, $force = true)
1275
  {
1276
    $str = (string)$str;
1277 1
    $encoding = (string)$encoding;
1278
1279
    if (!isset($str[0], $encoding[0])) {
1280
      return $str;
1281
    }
1282
1283
    if ($encoding !== 'UTF-8') {
1284
      $encoding = self::normalize_encoding($encoding);
1285
    }
1286
1287
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1288
      self::checkForSupport();
1289
    }
1290
1291
    $encodingDetected = self::str_detect_encoding($str);
1292
1293
    if (
1294
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1295
        &&
1296
        (
1297
            $force === true
1298
            ||
1299
            $encodingDetected !== $encoding
1300
        )
1301
    ) {
1302
1303
      if (
1304
          $encoding === 'UTF-8'
1305
          &&
1306
          (
1307
              $force === true
1308
              || $encodingDetected === 'UTF-8'
1309
              || $encodingDetected === 'WINDOWS-1252'
1310
              || $encodingDetected === 'ISO-8859-1'
1311
          )
1312
      ) {
1313
        return self::to_utf8($str);
1314
      }
1315
1316
      if (
1317
          $encoding === 'ISO-8859-1'
1318
          &&
1319
          (
1320
              $force === true
1321
              || $encodingDetected === 'ISO-8859-1'
1322
              || $encodingDetected === 'UTF-8'
1323
          )
1324
      ) {
1325
        return self::to_iso8859($str);
1326
      }
1327
1328 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1329
          $encoding !== 'UTF-8'
1330
          &&
1331
          $encoding !== 'WINDOWS-1252'
1332
          &&
1333
          self::$support['mbstring'] === false
1334
      ) {
1335
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1336
      }
1337
1338
      $strEncoded = \mb_convert_encoding(
1339
          $str,
1340
          $encoding,
1341
          $encodingDetected
1342
      );
1343
1344
      if ($strEncoded) {
1345
        return $strEncoded;
1346
      }
1347
    }
1348
1349
    return $str;
1350
  }
1351
1352
  /**
1353
   * Reads entire file into a string.
1354
   *
1355
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1356
   *
1357
   * @link http://php.net/manual/en/function.file-get-contents.php
1358
   *
1359
   * @param string        $filename      <p>
1360
   *                                     Name of the file to read.
1361
   *                                     </p>
1362 2
   * @param int|null      $flags         [optional] <p>
1363
   *                                     Prior to PHP 6, this parameter is called
1364
   *                                     use_include_path and is a bool.
1365 2
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1366 2
   *                                     to trigger include path
1367
   *                                     search.
1368 2
   *                                     </p>
1369 2
   *                                     <p>
1370
   *                                     The value of flags can be any combination of
1371
   *                                     the following flags (with some restrictions), joined with the
1372
   *                                     binary OR (|)
1373 2
   *                                     operator.
1374 2
   *                                     </p>
1375
   *                                     <p>
1376 2
   *                                     <table>
1377 2
   *                                     Available flags
1378
   *                                     <tr valign="top">
1379 2
   *                                     <td>Flag</td>
1380 1
   *                                     <td>Description</td>
1381 1
   *                                     </tr>
1382 2
   *                                     <tr valign="top">
1383
   *                                     <td>
1384
   *                                     FILE_USE_INCLUDE_PATH
1385
   *                                     </td>
1386 2
   *                                     <td>
1387 1
   *                                     Search for filename in the include directory.
1388
   *                                     See include_path for more
1389
   *                                     information.
1390 1
   *                                     </td>
1391 1
   *                                     </tr>
1392 1
   *                                     <tr valign="top">
1393 1
   *                                     <td>
1394
   *                                     FILE_TEXT
1395 1
   *                                     </td>
1396
   *                                     <td>
1397
   *                                     As of PHP 6, the default encoding of the read
1398
   *                                     data is UTF-8. You can specify a different encoding by creating a
1399
   *                                     custom context or by changing the default using
1400
   *                                     stream_default_encoding. This flag cannot be
1401
   *                                     used with FILE_BINARY.
1402
   *                                     </td>
1403
   *                                     </tr>
1404
   *                                     <tr valign="top">
1405 1
   *                                     <td>
1406
   *                                     FILE_BINARY
1407 1
   *                                     </td>
1408
   *                                     <td>
1409
   *                                     With this flag, the file is read in binary mode. This is the default
1410
   *                                     setting and cannot be used with FILE_TEXT.
1411
   *                                     </td>
1412
   *                                     </tr>
1413
   *                                     </table>
1414
   *                                     </p>
1415
   * @param resource|null $context       [optional] <p>
1416
   *                                     A valid context resource created with
1417
   *                                     stream_context_create. If you don't need to use a
1418
   *                                     custom context, you can skip this parameter by &null;.
1419 9
   *                                     </p>
1420
   * @param int|null      $offset        [optional] <p>
1421 9
   *                                     The offset where the reading starts.
1422 9
   *                                     </p>
1423 3
   * @param int|null      $maxlen        [optional] <p>
1424
   *                                     Maximum length of data read. The default is to read until end
1425 3
   *                                     of file is reached.
1426 3
   *                                     </p>
1427 3
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1428 9
   *
1429 2
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1430 2
   *                                     or pdf, because they used non default utf-8 chars</p>
1431 2
   *
1432 2
   * @return string <p>The function returns the read data or false on failure.</p>
1433 9
   */
1434
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1435 8
  {
1436
    // init
1437 2
    $timeout = (int)$timeout;
1438 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1439
1440 8
    if ($timeout && $context === null) {
1441
      $context = stream_context_create(
1442 8
          array(
1443 6
              'http' =>
1444 6
                  array(
1445 6
                      'timeout' => $timeout,
1446
                  ),
1447 6
          )
1448 3
      );
1449 3
    }
1450 5
1451
    if (is_int($maxlen) === true) {
1452
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1453
    } else {
1454
      $data = file_get_contents($filename, $flags, $context, $offset);
1455 8
    }
1456 8
1457 5
    // return false on error
1458 8
    if ($data === false) {
1459
      return false;
1460
    }
1461 2
1462 2
    if ($convertToUtf8 === true) {
1463 8
      $data = self::encode('UTF-8', $data, false);
1464 8
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1465 9
    }
1466
1467 9
    return $data;
1468
  }
1469
1470
  /**
1471
   * Checks if a file starts with BOM (Byte Order Mark) character.
1472
   *
1473
   * @param string $file_path <p>Path to a valid file.</p>
1474
   *
1475
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1476
   */
1477
  public static function file_has_bom($file_path)
1478
  {
1479
    return self::string_has_bom(file_get_contents($file_path));
1480
  }
1481
1482
  /**
1483
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1484
   *
1485
   * @param mixed  $var
1486
   * @param int    $normalization_form
1487
   * @param string $leading_combining
1488
   *
1489
   * @return mixed
1490
   */
1491
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1492
  {
1493
    switch (gettype($var)) {
1494 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1495
        foreach ($var as $k => $v) {
1496
          /** @noinspection AlterInForeachInspection */
1497
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1498
        }
1499
        break;
1500 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1501
        foreach ($var as $k => $v) {
1502
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1503
        }
1504
        break;
1505
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1506
1507
        if (false !== strpos($var, "\r")) {
1508
          // Workaround https://bugs.php.net/65732
1509
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1510
        }
1511
1512
        if (self::is_ascii($var) === false) {
1513
          /** @noinspection PhpUndefinedClassInspection */
1514
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1515
            $n = '-';
1516
          } else {
1517
            /** @noinspection PhpUndefinedClassInspection */
1518
            $n = \Normalizer::normalize($var, $normalization_form);
1519
1520 1
            if (isset($n[0])) {
1521
              $var = $n;
1522 1
            } else {
1523 1
              $var = self::encode('UTF-8', $var);
1524 1
            }
1525 1
          }
1526
1527
          if (
1528 1
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1529
              &&
1530
              preg_match('/^\p{Mn}/u', $var)
1531
          ) {
1532
            // Prevent leading combining chars
1533
            // for NFC-safe concatenations.
1534
            $var = $leading_combining . $var;
1535
          }
1536
        }
1537
1538
        break;
1539
    }
1540 1
1541
    return $var;
1542 1
  }
1543 1
1544 1
  /**
1545 1
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1546
   *
1547
   * Gets a specific external variable by name and optionally filters it
1548 1
   *
1549
   * @link  http://php.net/manual/en/function.filter-input.php
1550
   *
1551
   * @param int    $type          <p>
1552
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1553
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1554
   *                              <b>INPUT_ENV</b>.
1555
   *                              </p>
1556
   * @param string $variable_name <p>
1557
   *                              Name of a variable to get.
1558
   *                              </p>
1559 1
   * @param int    $filter        [optional] <p>
1560
   *                              The ID of the filter to apply. The
1561 1
   *                              manual page lists the available filters.
1562
   *                              </p>
1563
   * @param mixed  $options       [optional] <p>
1564
   *                              Associative array of options or bitwise disjunction of flags. If filter
1565
   *                              accepts options, flags can be provided in "flags" field of array.
1566
   *                              </p>
1567
   *
1568
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1569
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1570
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1571
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1572
   * @since 5.2.0
1573
   */
1574 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1575
  {
1576
    if (4 > func_num_args()) {
1577 7
      $var = filter_input($type, $variable_name, $filter);
1578
    } else {
1579 7
      $var = filter_input($type, $variable_name, $filter, $options);
1580 7
    }
1581
1582 7
    return self::filter($var);
1583
  }
1584 7
1585 2
  /**
1586
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1587
   *
1588 7
   * Gets external variables and optionally filters them
1589 1
   *
1590 1
   * @link  http://php.net/manual/en/function.filter-input-array.php
1591 1
   *
1592
   * @param int   $type       <p>
1593 7
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1594
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1595
   *                          <b>INPUT_ENV</b>.
1596
   *                          </p>
1597
   * @param mixed $definition [optional] <p>
1598
   *                          An array defining the arguments. A valid key is a string
1599
   *                          containing a variable name and a valid value is either a filter type, or an array
1600
   *                          optionally specifying the filter, flags and options. If the value is an
1601
   *                          array, valid keys are filter which specifies the
1602
   *                          filter type,
1603 1
   *                          flags which specifies any flags that apply to the
1604
   *                          filter, and options which specifies any options that
1605 1
   *                          apply to the filter. See the example below for a better understanding.
1606
   *                          </p>
1607 1
   *                          <p>
1608
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1609
   *                          input array are filtered by this filter.
1610 1
   *                          </p>
1611 1
   * @param bool  $add_empty  [optional] <p>
1612
   *                          Add missing keys as <b>NULL</b> to the return value.
1613 1
   *                          </p>
1614
   *
1615
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1616 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1617 1
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1618 1
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1619 1
   * fails.
1620 1
   * @since 5.2.0
1621
   */
1622 1 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1623
  {
1624
    if (2 > func_num_args()) {
1625
      $a = filter_input_array($type);
1626
    } else {
1627
      $a = filter_input_array($type, $definition, $add_empty);
1628
    }
1629
1630
    return self::filter($a);
1631
  }
1632 1
1633
  /**
1634 1
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1635
   *
1636
   * Filters a variable with a specified filter
1637
   *
1638 1
   * @link  http://php.net/manual/en/function.filter-var.php
1639
   *
1640
   * @param mixed $variable <p>
1641
   *                        Value to filter.
1642
   *                        </p>
1643
   * @param int   $filter   [optional] <p>
1644
   *                        The ID of the filter to apply. The
1645
   *                        manual page lists the available filters.
1646
   *                        </p>
1647
   * @param mixed $options  [optional] <p>
1648
   *                        Associative array of options or bitwise disjunction of flags. If filter
1649
   *                        accepts options, flags can be provided in "flags" field of array. For
1650
   *                        the "callback" filter, callable type should be passed. The
1651
   *                        callback must accept one argument, the value to be filtered, and return
1652
   *                        the value after filtering/sanitizing it.
1653
   *                        </p>
1654 1
   *                        <p>
1655
   *                        <code>
1656 1
   *                        // for filters that accept options, use this format
1657 1
   *                        $options = array(
1658
   *                        'options' => array(
1659
   *                        'default' => 3, // value to return if the filter fails
1660 1
   *                        // other options here
1661
   *                        'min_range' => 0
1662 1
   *                        ),
1663 1
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1664 1
   *                        );
1665 1
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1666 1
   *                        // for filter that only accept flags, you can pass them directly
1667 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1668 1
   *                        // for filter that only accept flags, you can also pass as an array
1669 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1670 1
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1671 1
   *                        // callback validate filter
1672 1
   *                        function foo($value)
1673
   *                        {
1674
   *                        // Expected format: Surname, GivenNames
1675
   *                        if (strpos($value, ", ") === false) return false;
1676
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1677
   *                        $empty = (empty($surname) || empty($givennames));
1678
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1679
   *                        if ($empty || $notstrings) {
1680
   *                        return false;
1681
   *                        } else {
1682
   *                        return $value;
1683
   *                        }
1684
   *                        }
1685
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1686
   *                        </code>
1687
   *                        </p>
1688
   *
1689
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1690
   * @since 5.2.0
1691
   */
1692 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1693 1
  {
1694
    if (3 > func_num_args()) {
1695
      $variable = filter_var($variable, $filter);
1696
    } else {
1697
      $variable = filter_var($variable, $filter, $options);
1698
    }
1699
1700
    return self::filter($variable);
1701
  }
1702
1703
  /**
1704
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1705
   *
1706
   * Gets multiple variables and optionally filters them
1707
   *
1708
   * @link  http://php.net/manual/en/function.filter-var-array.php
1709
   *
1710
   * @param array $data       <p>
1711
   *                          An array with string keys containing the data to filter.
1712
   *                          </p>
1713
   * @param mixed $definition [optional] <p>
1714
   *                          An array defining the arguments. A valid key is a string
1715
   *                          containing a variable name and a valid value is either a
1716
   *                          filter type, or an
1717
   *                          array optionally specifying the filter, flags and options.
1718
   *                          If the value is an array, valid keys are filter
1719
   *                          which specifies the filter type,
1720
   *                          flags which specifies any flags that apply to the
1721
   *                          filter, and options which specifies any options that
1722
   *                          apply to the filter. See the example below for a better understanding.
1723
   *                          </p>
1724
   *                          <p>
1725
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1726
   *                          input array are filtered by this filter.
1727
   *                          </p>
1728
   * @param bool  $add_empty  [optional] <p>
1729
   *                          Add missing keys as <b>NULL</b> to the return value.
1730
   *                          </p>
1731
   *
1732
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1733
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1734
   * the variable is not set.
1735
   * @since 5.2.0
1736
   */
1737 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1738
  {
1739
    if (2 > func_num_args()) {
1740
      $a = filter_var_array($data);
1741
    } else {
1742
      $a = filter_var_array($data, $definition, $add_empty);
1743
    }
1744
1745
    return self::filter($a);
1746
  }
1747
1748
  /**
1749
   * Check if the number of unicode characters are not more than the specified integer.
1750
   *
1751
   * @param string $str      The original string to be checked.
1752 1
   * @param int    $box_size The size in number of chars to be checked against string.
1753
   *
1754 1
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1755 1
   */
1756
  public static function fits_inside($str, $box_size)
1757 1
  {
1758
    return (self::strlen($str) <= $box_size);
1759
  }
1760
1761
  /**
1762
   * Try to fix simple broken UTF-8 strings.
1763
   *
1764
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1765
   *
1766
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1767
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1768
   * See: http://en.wikipedia.org/wiki/Windows-1252
1769
   *
1770
   * @param string $str <p>The input string</p>
1771
   *
1772 1
   * @return string
1773
   */
1774 1 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1775
  {
1776
    // init
1777
    $str = (string)$str;
1778
1779
    if (!isset($str[0])) {
1780
      return '';
1781
    }
1782
1783
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1784
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1785
1786 1
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1787
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1788 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1789 1
    }
1790
1791
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1792 1
  }
1793 1
1794
  /**
1795
   * Fix a double (or multiple) encoded UTF8 string.
1796 1
   *
1797
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1798
   *
1799
   * @return mixed
1800
   */
1801
  public static function fix_utf8($str)
1802
  {
1803
    if (is_array($str)) {
1804
1805
      /** @noinspection ForeachSourceInspection */
1806
      foreach ($str as $k => $v) {
1807
        /** @noinspection AlterInForeachInspection */
1808
        /** @noinspection OffsetOperationsInspection */
1809
        $str[$k] = self::fix_utf8($v);
1810 1
      }
1811
1812 1
      return $str;
1813
    }
1814
1815
    $last = '';
1816
    while ($last !== $str) {
1817
      $last = $str;
1818
      $str = self::to_utf8(
1819
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1818 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1820
      );
1821
    }
1822
1823
    return $str;
1824
  }
1825
1826 2
  /**
1827
   * Get character of a specific character.
1828
   *
1829 2
   * @param string $char
1830
   *
1831 2
   * @return string <p>'RTL' or 'LTR'</p>
1832 2
   */
1833 1
  public static function getCharDirection($char)
1834 1
  {
1835
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1836 2
      self::checkForSupport();
1837 1
    }
1838 1
1839
    if (self::$support['intlChar'] === true) {
1840 2
      $tmpReturn = \IntlChar::charDirection($char);
1841 2
1842 2
      // from "IntlChar"-Class
1843
      $charDirection = array(
1844 2
          'RTL' => array(1, 13, 14, 15, 21),
1845
          'LTR' => array(0, 11, 12, 20),
1846
      );
1847
1848
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1849
        return 'LTR';
1850
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1851
        return 'RTL';
1852
      }
1853
    }
1854
1855
    $c = static::chr_to_decimal($char);
1856
1857
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1858
      return 'LTR';
1859
    }
1860
1861
    if (0x85e >= $c) {
1862
1863
      if (0x5be === $c ||
1864
          0x5c0 === $c ||
1865
          0x5c3 === $c ||
1866
          0x5c6 === $c ||
1867
          (0x5d0 <= $c && 0x5ea >= $c) ||
1868
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1869
          0x608 === $c ||
1870
          0x60b === $c ||
1871
          0x60d === $c ||
1872
          0x61b === $c ||
1873
          (0x61e <= $c && 0x64a >= $c) ||
1874
          (0x66d <= $c && 0x66f >= $c) ||
1875
          (0x671 <= $c && 0x6d5 >= $c) ||
1876
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1877
          (0x6ee <= $c && 0x6ef >= $c) ||
1878
          (0x6fa <= $c && 0x70d >= $c) ||
1879
          0x710 === $c ||
1880
          (0x712 <= $c && 0x72f >= $c) ||
1881
          (0x74d <= $c && 0x7a5 >= $c) ||
1882
          0x7b1 === $c ||
1883
          (0x7c0 <= $c && 0x7ea >= $c) ||
1884
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1885
          0x7fa === $c ||
1886
          (0x800 <= $c && 0x815 >= $c) ||
1887
          0x81a === $c ||
1888
          0x824 === $c ||
1889
          0x828 === $c ||
1890
          (0x830 <= $c && 0x83e >= $c) ||
1891
          (0x840 <= $c && 0x858 >= $c) ||
1892
          0x85e === $c
1893
      ) {
1894
        return 'RTL';
1895
      }
1896
1897
    } elseif (0x200f === $c) {
1898
1899
      return 'RTL';
1900
1901
    } elseif (0xfb1d <= $c) {
1902
1903
      if (0xfb1d === $c ||
1904
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1905
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1906
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1907
          0xfb3e === $c ||
1908
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1909
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1910
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1911
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1912
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1913
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1914
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1915
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1916
          (0xfe76 <= $c && 0xfefc >= $c) ||
1917
          (0x10800 <= $c && 0x10805 >= $c) ||
1918
          0x10808 === $c ||
1919
          (0x1080a <= $c && 0x10835 >= $c) ||
1920
          (0x10837 <= $c && 0x10838 >= $c) ||
1921
          0x1083c === $c ||
1922
          (0x1083f <= $c && 0x10855 >= $c) ||
1923
          (0x10857 <= $c && 0x1085f >= $c) ||
1924
          (0x10900 <= $c && 0x1091b >= $c) ||
1925
          (0x10920 <= $c && 0x10939 >= $c) ||
1926 9
          0x1093f === $c ||
1927
          0x10a00 === $c ||
1928 9
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1929
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1930 9
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1931 6
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1932
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1933
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1934 9
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1935 7
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1936
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1937
          (0x10b78 <= $c && 0x10b7f >= $c)
1938
      ) {
1939 9
        return 'RTL';
1940 9
      }
1941
    }
1942 9
1943 9
    return 'LTR';
1944 9
  }
1945 9
1946 9
  /**
1947 6
   * get data from "/data/*.ser"
1948
   *
1949
   * @param string $file
1950 9
   *
1951 2
   * @return bool|string|array|int <p>Will return false on error.</p>
1952 2
   */
1953
  private static function getData($file)
1954 9
  {
1955 4
    $file = __DIR__ . '/data/' . $file . '.php';
1956 4
    if (file_exists($file)) {
1957 4
      /** @noinspection PhpIncludeInspection */
1958
      return require $file;
1959
    } else {
1960 4
      return false;
1961
    }
1962
  }
1963 9
1964
  /**
1965 9
   * alias for "UTF8::string_has_bom()"
1966 9
   *
1967
   * @see UTF8::string_has_bom()
1968 7
   *
1969
   * @param string $str
1970 7
   *
1971 6
   * @return bool
1972
   *
1973 4
   * @deprecated
1974
   */
1975 9
  public static function hasBom($str)
1976
  {
1977 9
    return self::string_has_bom($str);
1978
  }
1979
1980 9
  /**
1981 9
   * Converts a hexadecimal-value into an UTF-8 character.
1982 9
   *
1983
   * @param string $hexdec <p>The hexadecimal value.</p>
1984 9
   *
1985
   * @return string|false <p>One single UTF-8 character.</p>
1986 9
   */
1987
  public static function hex_to_chr($hexdec)
1988 9
  {
1989
    return self::decimal_to_chr(hexdec($hexdec));
1990
  }
1991
1992
  /**
1993
   * Converts hexadecimal U+xxxx code point representation to integer.
1994
   *
1995
   * INFO: opposite to UTF8::int_to_hex()
1996
   *
1997
   * @param string $hexdec <p>The hexadecimal code point representation.</p>
1998
   *
1999
   * @return int|false <p>The code point, or false on failure.</p>
2000
   */
2001
  public static function hex_to_int($hexdec)
2002
  {
2003
    if (!$hexdec) {
2004
      return false;
2005
    }
2006
2007
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexdec, $match)) {
2008
      return intval($match[1], 16);
2009
    }
2010
2011
    return false;
2012
  }
2013
2014
  /**
2015
   * alias for "UTF8::html_entity_decode()"
2016
   *
2017
   * @see UTF8::html_entity_decode()
2018
   *
2019
   * @param string $str
2020
   * @param int    $flags
2021
   * @param string $encoding
2022
   *
2023
   * @return string
2024
   */
2025
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2026
  {
2027
    return self::html_entity_decode($str, $flags, $encoding);
2028
  }
2029
2030
  /**
2031
   * Converts a UTF-8 string to a series of HTML numbered entities.
2032
   *
2033
   * INFO: opposite to UTF8::html_decode()
2034
   *
2035
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2036
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2037
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2038
   *
2039
   * @return string <p>HTML numbered entities.</p>
2040
   */
2041
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2042
  {
2043
    // init
2044
    $str = (string)$str;
2045
2046
    if (!isset($str[0])) {
2047
      return '';
2048
    }
2049
2050
    if ($encoding !== 'UTF-8') {
2051
      $encoding = self::normalize_encoding($encoding);
2052
    }
2053
2054
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2055
    if (function_exists('mb_encode_numericentity')) {
2056
2057
      $startCode = 0x00;
2058
      if ($keepAsciiChars === true) {
2059
        $startCode = 0x80;
2060
      }
2061
2062
      return mb_encode_numericentity(
2063
          $str,
2064
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2065
          $encoding
2066
      );
2067
    }
2068
2069
    return implode(
2070
        '',
2071
        array_map(
2072
            function ($data) use ($keepAsciiChars, $encoding) {
2073
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2074
            },
2075
            self::split($str)
2076
        )
2077
    );
2078
  }
2079
2080
  /**
2081
   * UTF-8 version of html_entity_decode()
2082
   *
2083
   * The reason we are not using html_entity_decode() by itself is because
2084
   * while it is not technically correct to leave out the semicolon
2085
   * at the end of an entity most browsers will still interpret the entity
2086
   * correctly. html_entity_decode() does not convert entities without
2087
   * semicolons, so we are left with our own little solution here. Bummer.
2088
   *
2089
   * Convert all HTML entities to their applicable characters
2090
   *
2091
   * INFO: opposite to UTF8::html_encode()
2092
   *
2093
   * @link http://php.net/manual/en/function.html-entity-decode.php
2094 2
   *
2095
   * @param string $str      <p>
2096 2
   *                         The input string.
2097 1
   *                         </p>
2098 1
   * @param int    $flags    [optional] <p>
2099
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2100 2
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2101
   *                         <table>
2102 2
   *                         Available <i>flags</i> constants
2103 1
   *                         <tr valign="top">
2104
   *                         <td>Constant Name</td>
2105
   *                         <td>Description</td>
2106 2
   *                         </tr>
2107 2
   *                         <tr valign="top">
2108 2
   *                         <td><b>ENT_COMPAT</b></td>
2109 2
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2110 2
   *                         </tr>
2111 1
   *                         <tr valign="top">
2112
   *                         <td><b>ENT_QUOTES</b></td>
2113 1
   *                         <td>Will convert both double and single quotes.</td>
2114 1
   *                         </tr>
2115 1
   *                         <tr valign="top">
2116 1
   *                         <td><b>ENT_NOQUOTES</b></td>
2117 1
   *                         <td>Will leave both double and single quotes unconverted.</td>
2118 2
   *                         </tr>
2119
   *                         <tr valign="top">
2120 2
   *                         <td><b>ENT_HTML401</b></td>
2121
   *                         <td>
2122
   *                         Handle code as HTML 4.01.
2123
   *                         </td>
2124
   *                         </tr>
2125
   *                         <tr valign="top">
2126
   *                         <td><b>ENT_XML1</b></td>
2127
   *                         <td>
2128
   *                         Handle code as XML 1.
2129
   *                         </td>
2130
   *                         </tr>
2131
   *                         <tr valign="top">
2132
   *                         <td><b>ENT_XHTML</b></td>
2133
   *                         <td>
2134
   *                         Handle code as XHTML.
2135
   *                         </td>
2136
   *                         </tr>
2137
   *                         <tr valign="top">
2138
   *                         <td><b>ENT_HTML5</b></td>
2139
   *                         <td>
2140
   *                         Handle code as HTML 5.
2141
   *                         </td>
2142
   *                         </tr>
2143
   *                         </table>
2144
   *                         </p>
2145
   * @param string $encoding [optional] <p>Encoding to use.</p>
2146
   *
2147
   * @return string <p>The decoded string.</p>
2148
   */
2149
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2150
  {
2151
    // init
2152
    $str = (string)$str;
2153
2154
    if (!isset($str[0])) {
2155
      return '';
2156
    }
2157
2158
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2159
      return $str;
2160
    }
2161
2162
    if (
2163
        strpos($str, '&') === false
2164
        ||
2165
        (
2166
            strpos($str, '&#') === false
2167
            &&
2168
            strpos($str, ';') === false
2169
        )
2170
    ) {
2171
      return $str;
2172
    }
2173
2174
    if ($encoding !== 'UTF-8') {
2175
      $encoding = self::normalize_encoding($encoding);
2176
    }
2177
2178
    if ($flags === null) {
2179
      if (Bootup::is_php('5.4') === true) {
2180
        $flags = ENT_QUOTES | ENT_HTML5;
2181
      } else {
2182
        $flags = ENT_QUOTES;
2183
      }
2184
    }
2185
2186
    do {
2187
      $str_compare = $str;
2188
2189
      $str = preg_replace_callback(
2190
          "/&#\d{2,6};/",
2191
          function ($matches) use ($encoding) {
2192
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2193
2194
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2195
              return $returnTmp;
2196
            } else {
2197
              return $matches[0];
2198
            }
2199
          },
2200
          $str
2201
      );
2202
2203
      // decode numeric & UTF16 two byte entities
2204
      $str = html_entity_decode(
2205
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2206
          $flags,
2207
          $encoding
2208
      );
2209
2210
    } while ($str_compare !== $str);
2211
2212
    return $str;
2213
  }
2214
2215
  /**
2216
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2217
   *
2218
   * @link http://php.net/manual/en/function.htmlentities.php
2219
   *
2220
   * @param string $str           <p>
2221
   *                              The input string.
2222
   *                              </p>
2223
   * @param int    $flags         [optional] <p>
2224
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2225
   *                              invalid code unit sequences and the used document type. The default is
2226
   *                              ENT_COMPAT | ENT_HTML401.
2227
   *                              <table>
2228
   *                              Available <i>flags</i> constants
2229
   *                              <tr valign="top">
2230
   *                              <td>Constant Name</td>
2231
   *                              <td>Description</td>
2232 1
   *                              </tr>
2233
   *                              <tr valign="top">
2234 1
   *                              <td><b>ENT_COMPAT</b></td>
2235
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2236
   *                              </tr>
2237
   *                              <tr valign="top">
2238 1
   *                              <td><b>ENT_QUOTES</b></td>
2239
   *                              <td>Will convert both double and single quotes.</td>
2240
   *                              </tr>
2241
   *                              <tr valign="top">
2242
   *                              <td><b>ENT_NOQUOTES</b></td>
2243
   *                              <td>Will leave both double and single quotes unconverted.</td>
2244
   *                              </tr>
2245
   *                              <tr valign="top">
2246 1
   *                              <td><b>ENT_IGNORE</b></td>
2247
   *                              <td>
2248 1
   *                              Silently discard invalid code unit sequences instead of returning
2249
   *                              an empty string. Using this flag is discouraged as it
2250
   *                              may have security implications.
2251
   *                              </td>
2252
   *                              </tr>
2253
   *                              <tr valign="top">
2254
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2255
   *                              <td>
2256
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2257
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2258
   *                              </td>
2259
   *                              </tr>
2260
   *                              <tr valign="top">
2261 3
   *                              <td><b>ENT_DISALLOWED</b></td>
2262
   *                              <td>
2263 3
   *                              Replace invalid code points for the given document type with a
2264 3
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2265
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2266 3
   *                              instance, to ensure the well-formedness of XML documents with
2267
   *                              embedded external content.
2268 3
   *                              </td>
2269
   *                              </tr>
2270
   *                              <tr valign="top">
2271
   *                              <td><b>ENT_HTML401</b></td>
2272
   *                              <td>
2273
   *                              Handle code as HTML 4.01.
2274
   *                              </td>
2275
   *                              </tr>
2276
   *                              <tr valign="top">
2277
   *                              <td><b>ENT_XML1</b></td>
2278
   *                              <td>
2279 1
   *                              Handle code as XML 1.
2280
   *                              </td>
2281 1
   *                              </tr>
2282
   *                              <tr valign="top">
2283
   *                              <td><b>ENT_XHTML</b></td>
2284
   *                              <td>
2285
   *                              Handle code as XHTML.
2286
   *                              </td>
2287
   *                              </tr>
2288
   *                              <tr valign="top">
2289 2
   *                              <td><b>ENT_HTML5</b></td>
2290
   *                              <td>
2291 2
   *                              Handle code as HTML 5.
2292
   *                              </td>
2293
   *                              </tr>
2294
   *                              </table>
2295
   *                              </p>
2296
   * @param string $encoding      [optional] <p>
2297
   *                              Like <b>htmlspecialchars</b>,
2298
   *                              <b>htmlentities</b> takes an optional third argument
2299
   *                              <i>encoding</i> which defines encoding used in
2300
   *                              conversion.
2301
   *                              Although this argument is technically optional, you are highly
2302
   *                              encouraged to specify the correct value for your code.
2303 2
   *                              </p>
2304
   * @param bool   $double_encode [optional] <p>
2305 2
   *                              When <i>double_encode</i> is turned off PHP will not
2306
   *                              encode existing html entities. The default is to convert everything.
2307
   *                              </p>
2308
   *
2309
   *
2310
   * @return string the encoded string.
2311
   * </p>
2312
   * <p>
2313
   * If the input <i>string</i> contains an invalid code unit
2314
   * sequence within the given <i>encoding</i> an empty string
2315
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2316
   * <b>ENT_SUBSTITUTE</b> flags are set.
2317 1
   */
2318
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2319 1
  {
2320
    if ($encoding !== 'UTF-8') {
2321
      $encoding = self::normalize_encoding($encoding);
2322
    }
2323
2324
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2325
2326
    if ($encoding !== 'UTF-8') {
2327
      return $str;
2328
    }
2329
2330
    $byteLengths = self::chr_size_list($str);
2331
    $search = array();
2332
    $replacements = array();
2333
    foreach ($byteLengths as $counter => $byteLength) {
2334
      if ($byteLength >= 3) {
2335
        $char = self::access($str, $counter);
2336
2337
        if (!isset($replacements[$char])) {
2338
          $search[$char] = $char;
2339
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2335 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2340
        }
2341
      }
2342
    }
2343
2344
    return str_replace($search, $replacements, $str);
2345
  }
2346
2347
  /**
2348
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2349
   *
2350
   * INFO: Take a look at "UTF8::htmlentities()"
2351
   *
2352
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2353
   *
2354
   * @param string $str           <p>
2355
   *                              The string being converted.
2356
   *                              </p>
2357
   * @param int    $flags         [optional] <p>
2358
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2359 1
   *                              invalid code unit sequences and the used document type. The default is
2360
   *                              ENT_COMPAT | ENT_HTML401.
2361 1
   *                              <table>
2362
   *                              Available <i>flags</i> constants
2363
   *                              <tr valign="top">
2364
   *                              <td>Constant Name</td>
2365
   *                              <td>Description</td>
2366
   *                              </tr>
2367
   *                              <tr valign="top">
2368
   *                              <td><b>ENT_COMPAT</b></td>
2369
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2370
   *                              </tr>
2371
   *                              <tr valign="top">
2372
   *                              <td><b>ENT_QUOTES</b></td>
2373
   *                              <td>Will convert both double and single quotes.</td>
2374
   *                              </tr>
2375
   *                              <tr valign="top">
2376
   *                              <td><b>ENT_NOQUOTES</b></td>
2377
   *                              <td>Will leave both double and single quotes unconverted.</td>
2378
   *                              </tr>
2379
   *                              <tr valign="top">
2380
   *                              <td><b>ENT_IGNORE</b></td>
2381
   *                              <td>
2382
   *                              Silently discard invalid code unit sequences instead of returning
2383
   *                              an empty string. Using this flag is discouraged as it
2384
   *                              may have security implications.
2385
   *                              </td>
2386
   *                              </tr>
2387 1
   *                              <tr valign="top">
2388
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2389 1
   *                              <td>
2390
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2391
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2392
   *                              </td>
2393
   *                              </tr>
2394
   *                              <tr valign="top">
2395
   *                              <td><b>ENT_DISALLOWED</b></td>
2396
   *                              <td>
2397
   *                              Replace invalid code points for the given document type with a
2398
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2399
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2400
   *                              instance, to ensure the well-formedness of XML documents with
2401 1
   *                              embedded external content.
2402
   *                              </td>
2403 1
   *                              </tr>
2404
   *                              <tr valign="top">
2405
   *                              <td><b>ENT_HTML401</b></td>
2406
   *                              <td>
2407
   *                              Handle code as HTML 4.01.
2408
   *                              </td>
2409
   *                              </tr>
2410
   *                              <tr valign="top">
2411
   *                              <td><b>ENT_XML1</b></td>
2412
   *                              <td>
2413
   *                              Handle code as XML 1.
2414
   *                              </td>
2415
   *                              </tr>
2416 16
   *                              <tr valign="top">
2417
   *                              <td><b>ENT_XHTML</b></td>
2418 16
   *                              <td>
2419
   *                              Handle code as XHTML.
2420
   *                              </td>
2421
   *                              </tr>
2422
   *                              <tr valign="top">
2423
   *                              <td><b>ENT_HTML5</b></td>
2424
   *                              <td>
2425
   *                              Handle code as HTML 5.
2426
   *                              </td>
2427
   *                              </tr>
2428
   *                              </table>
2429
   *                              </p>
2430
   * @param string $encoding      [optional] <p>
2431 28
   *                              Defines encoding used in conversion.
2432
   *                              </p>
2433 28
   *                              <p>
2434
   *                              For the purposes of this function, the encodings
2435 28
   *                              ISO-8859-1, ISO-8859-15,
2436 5
   *                              UTF-8, cp866,
2437
   *                              cp1251, cp1252, and
2438
   *                              KOI8-R are effectively equivalent, provided the
2439 28
   *                              <i>string</i> itself is valid for the encoding, as
2440
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2441
   *                              the same positions in all of these encodings.
2442
   *                              </p>
2443
   * @param bool   $double_encode [optional] <p>
2444
   *                              When <i>double_encode</i> is turned off PHP will not
2445
   *                              encode existing html entities, the default is to convert everything.
2446
   *                              </p>
2447
   *
2448
   * @return string The converted string.
2449 1
   * </p>
2450
   * <p>
2451 1
   * If the input <i>string</i> contains an invalid code unit
2452
   * sequence within the given <i>encoding</i> an empty string
2453 1
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2454 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
2455
   */
2456
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2457 1
  {
2458 1
    if ($encoding !== 'UTF-8') {
2459
      $encoding = self::normalize_encoding($encoding);
2460 1
    }
2461
2462
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2463
  }
2464
2465
  /**
2466
   * Checks whether iconv is available on the server.
2467
   *
2468
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2469
   */
2470
  public static function iconv_loaded()
2471 16
  {
2472
    $return = extension_loaded('iconv') ? true : false;
2473
2474 16
    // INFO: "default_charset" is already set by the "Bootup"-class
2475
2476
    if (!Bootup::is_php('5.6')) {
2477 16
      // INFO: "iconv_set_encoding" is deprecated cince PHP 5.6
2478
      iconv_set_encoding('input_encoding', 'UTF-8');
2479 16
      iconv_set_encoding('output_encoding', 'UTF-8');
2480 16
      iconv_set_encoding('internal_encoding', 'UTF-8');
2481 15
    }
2482 16
2483 6
    return $return;
2484
  }
2485 15
2486
  /**
2487
   * alias for "UTF8::decimal_to_chr()"
2488
   *
2489
   * @see UTF8::decimal_to_chr()
2490
   *
2491
   * @param int $int
2492
   *
2493
   * @return string
2494
   */
2495
  public static function int_to_chr($int)
2496
  {
2497
    return self::decimal_to_chr($int);
2498
  }
2499
2500
  /**
2501
   * Converts Integer to hexadecimal U+xxxx code point representation.
2502
   *
2503
   * INFO: opposite to UTF8::hex_to_int()
2504
   *
2505
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2506
   * @param string $pfix [optional]
2507
   *
2508
   * @return string <p>The code point, or empty string on failure.</p>
2509
   */
2510
  public static function int_to_hex($int, $pfix = 'U+')
2511
  {
2512
    if (ctype_digit((string)$int)) {
2513
      $hex = dechex((int)$int);
2514
2515
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2516
2517
      return $pfix . $hex;
2518
    }
2519
2520
    return '';
2521
  }
2522
2523
  /**
2524
   * Checks whether intl-char is available on the server.
2525
   *
2526
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2527
   */
2528
  public static function intlChar_loaded()
2529
  {
2530
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2531
  }
2532
2533
  /**
2534
   * Checks whether intl is available on the server.
2535
   *
2536 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2537
   */
2538 1
  public static function intl_loaded()
2539
  {
2540 1
    return extension_loaded('intl') ? true : false;
2541
  }
2542
2543
  /**
2544
   * alias for "UTF8::is_ascii()"
2545 1
   *
2546
   * @see UTF8::is_ascii()
2547 1
   *
2548
   * @param string $str
2549 1
   *
2550 1
   * @return boolean
2551
   *
2552 1
   * @deprecated
2553
   */
2554
  public static function isAscii($str)
2555
  {
2556
    return self::is_ascii($str);
2557
  }
2558
2559
  /**
2560
   * alias for "UTF8::is_base64()"
2561
   *
2562
   * @see UTF8::is_base64()
2563 1
   *
2564
   * @param string $str
2565 1
   *
2566
   * @return bool
2567 1
   *
2568
   * @deprecated
2569
   */
2570
  public static function isBase64($str)
2571
  {
2572 1
    return self::is_base64($str);
2573 1
  }
2574 1
2575 1
  /**
2576 1
   * alias for "UTF8::is_binary()"
2577
   *
2578 1
   * @see UTF8::is_binary()
2579
   *
2580
   * @param string $str
2581
   *
2582
   * @return bool
2583
   *
2584
   * @deprecated
2585
   */
2586
  public static function isBinary($str)
2587
  {
2588
    return self::is_binary($str);
2589
  }
2590
2591
  /**
2592
   * alias for "UTF8::is_bom()"
2593 4
   *
2594
   * @see UTF8::is_bom()
2595 4
   *
2596
   * @param string $utf8_chr
2597 4
   *
2598
   * @return boolean
2599 4
   *
2600 4
   * @deprecated
2601 4
   */
2602 4
  public static function isBom($utf8_chr)
2603 4
  {
2604 4
    return self::is_bom($utf8_chr);
2605 4
  }
2606 4
2607 4
  /**
2608 2
   * alias for "UTF8::is_html()"
2609 2
   *
2610 4
   * @see UTF8::is_html()
2611 4
   *
2612 4
   * @param string $str
2613
   *
2614 4
   * @return boolean
2615 4
   *
2616 4
   * @deprecated
2617 4
   */
2618 4
  public static function isHtml($str)
2619 4
  {
2620 4
    return self::is_html($str);
2621 4
  }
2622 4
2623 3
  /**
2624 3
   * alias for "UTF8::is_json()"
2625 4
   *
2626 4
   * @see UTF8::is_json()
2627 4
   *
2628
   * @param string $str
2629 4
   *
2630 3
   * @return bool
2631 2
   *
2632
   * @deprecated
2633 3
   */
2634
  public static function isJson($str)
2635
  {
2636
    return self::is_json($str);
2637 3
  }
2638
2639 3
  /**
2640
   * alias for "UTF8::is_utf16()"
2641
   *
2642
   * @see UTF8::is_utf16()
2643
   *
2644
   * @param string $str
2645
   *
2646
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2647
   *
2648
   * @deprecated
2649
   */
2650
  public static function isUtf16($str)
2651
  {
2652
    return self::is_utf16($str);
2653 3
  }
2654
2655 3
  /**
2656
   * alias for "UTF8::is_utf32()"
2657 3
   *
2658
   * @see UTF8::is_utf32()
2659 3
   *
2660 3
   * @param string $str
2661 3
   *
2662 3
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2663 3
   *
2664 3
   * @deprecated
2665 3
   */
2666 3
  public static function isUtf32($str)
2667 3
  {
2668 1
    return self::is_utf32($str);
2669 1
  }
2670 3
2671 3
  /**
2672 3
   * alias for "UTF8::is_utf8()"
2673
   *
2674 3
   * @see UTF8::is_utf8()
2675 3
   *
2676 3
   * @param string $str
2677 3
   * @param bool   $strict
2678 3
   *
2679 3
   * @return bool
2680 3
   *
2681 3
   * @deprecated
2682 3
   */
2683 1
  public static function isUtf8($str, $strict = false)
2684 1
  {
2685 3
    return self::is_utf8($str, $strict);
2686 3
  }
2687 3
2688
  /**
2689 3
   * Checks if a string is 7 bit ASCII.
2690 1
   *
2691 1
   * @param string $str <p>The string to check.</p>
2692
   *
2693 1
   * @return bool <p>
2694
   *              <strong>true</strong> if it is ASCII<br />
2695
   *              <strong>false</strong> otherwise
2696
   *              </p>
2697 3
   */
2698
  public static function is_ascii($str)
2699 3
  {
2700
    $str = (string)$str;
2701
2702
    if (!isset($str[0])) {
2703
      return true;
2704
    }
2705
2706
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2707
  }
2708
2709
  /**
2710
   * Returns true if the string is base64 encoded, false otherwise.
2711
   *
2712 43
   * @param string $str <p>The input string.</p>
2713
   *
2714 43
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2715
   */
2716 43
  public static function is_base64($str)
2717 3
  {
2718
    $str = (string)$str;
2719
2720 41
    if (!isset($str[0])) {
2721 1
      return false;
2722 1
    }
2723
2724
    if (base64_encode(base64_decode($str, true)) === $str) {
2725
      return true;
2726
    } else {
2727
      return false;
2728
    }
2729
  }
2730 41
2731
  /**
2732
   * Check if the input is binary... (is look like a hack).
2733
   *
2734
   * @param mixed $input
2735
   *
2736
   * @return bool
2737
   */
2738
  public static function is_binary($input)
2739
  {
2740 41
    if (preg_match('~^[01]+$~', $input)) {
2741
      return true;
2742 41
    }
2743 41
2744 41
    $testLength = strlen($input);
2745
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2746
      return true;
2747 41
    }
2748 41
2749 41
    if (substr_count($input, "\x00") > 0) {
2750
      return true;
2751
    }
2752 41
2753
    return false;
2754 36
  }
2755 41
2756
  /**
2757 34
   * Check if the file is binary.
2758 34
   *
2759 34
   * @param string $file
2760 34
   *
2761 39
   * @return boolean
2762
   */
2763 21
  public static function is_binary_file($file)
2764 21
  {
2765 21
    try {
2766 21
      $fp = fopen($file, 'rb');
2767 33
      $block = fread($fp, 512);
2768
      fclose($fp);
2769 9
    } catch (\Exception $e) {
2770 9
      $block = '';
2771 9
    }
2772 9
2773 16
    return self::is_binary($block);
2774
  }
2775
2776
  /**
2777
   * Checks if the given string is equal to any "Byte Order Mark".
2778
   *
2779
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2780
   *
2781
   * @param string $str <p>The input string.</p>
2782 3
   *
2783 3
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2784 3
   */
2785 3
  public static function is_bom($str)
2786 9
  {
2787
    foreach (self::$bom as $bomString => $bomByteLength) {
2788 3
      if ($str === $bomString) {
2789 3
        return true;
2790 3
      }
2791 3
    }
2792 3
2793
    return false;
2794
  }
2795
2796 5
  /**
2797
   * Check if the string contains any html-tags <lall>.
2798 41
   *
2799
   * @param string $str <p>The input string.</p>
2800
   *
2801 36
   * @return boolean
2802
   */
2803 33
  public static function is_html($str)
2804 33
  {
2805 33
    $str = (string)$str;
2806 33
2807
    if (!isset($str[0])) {
2808
      return false;
2809
    }
2810
2811 33
    // init
2812
    $matches = array();
2813
2814
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2815
2816
    if (count($matches) === 0) {
2817 33
      return false;
2818 33
    } else {
2819 33
      return true;
2820 33
    }
2821
  }
2822 33
2823
  /**
2824 33
   * Try to check if "$str" is an json-string.
2825 33
   *
2826 5
   * @param string $str <p>The input string.</p>
2827
   *
2828
   * @return bool
2829 33
   */
2830 33
  public static function is_json($str)
2831 33
  {
2832 33
    $str = (string)$str;
2833 33
2834
    if (!isset($str[0])) {
2835
      return false;
2836
    }
2837
2838 18
    if (
2839
        is_object(self::json_decode($str))
2840
        &&
2841 41
        json_last_error() === JSON_ERROR_NONE
2842
    ) {
2843 20
      return true;
2844
    } else {
2845
      return false;
2846
    }
2847
  }
2848
2849
  /**
2850
   * Check if the string is UTF-16.
2851
   *
2852
   * @param string $str <p>The input string.</p>
2853
   *
2854
   * @return int|false <p>
2855
   *                   <strong>false</strong> if is't not UTF-16,<br />
2856
   *                   <strong>1</strong> for UTF-16LE,<br />
2857
   *                   <strong>2</strong> for UTF-16BE.
2858
   *                   </p>
2859
   */
2860 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2861
  {
2862
    $str = self::remove_bom($str);
2863
2864
    if (self::is_binary($str)) {
2865
2866
      $maybeUTF16LE = 0;
2867
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2868
      if ($test) {
2869
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2870
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2871
        if ($test3 === $test) {
2872
          $strChars = self::count_chars($str, true);
2873
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2874
            if (in_array($test3char, $strChars, true) === true) {
2875
              $maybeUTF16LE++;
2876
            }
2877
          }
2878
        }
2879
      }
2880
2881
      $maybeUTF16BE = 0;
2882
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2883 2
      if ($test) {
2884
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2885 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2886
        if ($test3 === $test) {
2887 2
          $strChars = self::count_chars($str, true);
2888 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2889 2
            if (in_array($test3char, $strChars, true) === true) {
2890
              $maybeUTF16BE++;
2891
            }
2892
          }
2893 2
        }
2894
      }
2895
2896
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2897
        if ($maybeUTF16LE > $maybeUTF16BE) {
2898
          return 1;
2899
        } else {
2900
          return 2;
2901
        }
2902
      }
2903
2904
    }
2905
2906
    return false;
2907
  }
2908
2909
  /**
2910
   * Check if the string is UTF-32.
2911
   *
2912
   * @param string $str
2913
   *
2914
   * @return int|false <p>
2915
   *                   <strong>false</strong> if is't not UTF-16,<br />
2916
   *                   <strong>1</strong> for UTF-32LE,<br />
2917
   *                   <strong>2</strong> for UTF-32BE.
2918
   *                   </p>
2919
   */
2920 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2921
  {
2922
    $str = self::remove_bom($str);
2923
2924
    if (self::is_binary($str)) {
2925
2926
      $maybeUTF32LE = 0;
2927
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2928
      if ($test) {
2929
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2930
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2931
        if ($test3 === $test) {
2932 2
          $strChars = self::count_chars($str, true);
2933
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2934 2
            if (in_array($test3char, $strChars, true) === true) {
2935
              $maybeUTF32LE++;
2936 2
            }
2937
          }
2938
        }
2939 2
      }
2940
2941
      $maybeUTF32BE = 0;
2942 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2943
      if ($test) {
2944
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2945
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2946
        if ($test3 === $test) {
2947
          $strChars = self::count_chars($str, true);
2948
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2949
            if (in_array($test3char, $strChars, true) === true) {
2950
              $maybeUTF32BE++;
2951
            }
2952 6
          }
2953
        }
2954 6
      }
2955
2956
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2957
        if ($maybeUTF32LE > $maybeUTF32BE) {
2958
          return 1;
2959
        } else {
2960
          return 2;
2961
        }
2962
      }
2963
2964
    }
2965 24
2966
    return false;
2967 24
  }
2968
2969 24
  /**
2970 2
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2971
   *
2972
   * @see    http://hsivonen.iki.fi/php-utf8/
2973
   *
2974 23
   * @param string $str    <p>The string to be checked.</p>
2975 2
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2976
   *
2977
   * @return bool
2978 23
   */
2979
  public static function is_utf8($str, $strict = false)
2980 23
  {
2981
    $str = (string)$str;
2982
2983
    if (!isset($str[0])) {
2984
      return true;
2985
    }
2986
2987
    if ($strict === true) {
2988
      if (self::is_utf16($str) !== false) {
2989
        return false;
2990 1
      }
2991
2992 1
      if (self::is_utf32($str) !== false) {
2993
        return false;
2994
      }
2995
    }
2996 1
2997
    if (self::pcre_utf8_support() !== true) {
2998
2999
      // If even just the first character can be matched, when the /u
3000
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3001
      // invalid, nothing at all will match, even if the string contains
3002
      // some valid sequences
3003
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3004
3005
    } else {
3006
3007 1
      $mState = 0; // cached expected number of octets after the current octet
3008
      // until the beginning of the next UTF8 character sequence
3009 1
      $mUcs4 = 0; // cached Unicode character
3010 1
      $mBytes = 1; // cached expected number of octets in the current sequence
3011 1
      $len = strlen($str);
3012
3013 1
      /** @noinspection ForeachInvariantsInspection */
3014
      for ($i = 0; $i < $len; $i++) {
3015
        $in = ord($str[$i]);
3016
        if ($mState === 0) {
3017
          // When mState is zero we expect either a US-ASCII character or a
3018
          // multi-octet sequence.
3019
          if (0 === (0x80 & $in)) {
3020
            // US-ASCII, pass straight through.
3021
            $mBytes = 1;
3022 2 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3023
            // First octet of 2 octet sequence.
3024 2
            $mUcs4 = $in;
3025
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3026 2
            $mState = 1;
3027 2
            $mBytes = 2;
3028 2
          } elseif (0xE0 === (0xF0 & $in)) {
3029
            // First octet of 3 octet sequence.
3030 2
            $mUcs4 = $in;
3031
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3032
            $mState = 2;
3033
            $mBytes = 3;
3034 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3035
            // First octet of 4 octet sequence.
3036
            $mUcs4 = $in;
3037
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3038
            $mState = 3;
3039
            $mBytes = 4;
3040 1
          } elseif (0xF8 === (0xFC & $in)) {
3041
            /* First octet of 5 octet sequence.
3042 1
            *
3043
            * This is illegal because the encoded codepoint must be either
3044
            * (a) not the shortest form or
3045
            * (b) outside the Unicode range of 0-0x10FFFF.
3046 1
            * Rather than trying to resynchronize, we will carry on until the end
3047
            * of the sequence and let the later error handling code catch it.
3048
            */
3049
            $mUcs4 = $in;
3050
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3051
            $mState = 4;
3052
            $mBytes = 5;
3053 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3054
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3055
            $mUcs4 = $in;
3056
            $mUcs4 = ($mUcs4 & 1) << 30;
3057
            $mState = 5;
3058 1
            $mBytes = 6;
3059
          } else {
3060 1
            /* Current octet is neither in the US-ASCII range nor a legal first
3061
             * octet of a multi-octet sequence.
3062
             */
3063
            return false;
3064
          }
3065
        } else {
3066
          // When mState is non-zero, we expect a continuation of the multi-octet
3067
          // sequence
3068
          if (0x80 === (0xC0 & $in)) {
3069
            // Legal continuation.
3070 16
            $shift = ($mState - 1) * 6;
3071
            $tmp = $in;
3072 16
            $tmp = ($tmp & 0x0000003F) << $shift;
3073
            $mUcs4 |= $tmp;
3074 16
            /**
3075 2
             * End of the multi-octet sequence. mUcs4 now contains the final
3076
             * Unicode code point to be output
3077
             */
3078 16
            if (0 === --$mState) {
3079 1
              /*
3080
              * Check for illegal sequences and code points.
3081
              */
3082 16
              // From Unicode 3.1, non-shortest form is illegal
3083 4
              if (
3084
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3085
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3086 15
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3087 14
                  (4 < $mBytes) ||
3088
                  // From Unicode 3.2, surrogate characters are illegal.
3089
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3090 4
                  // Code points outside the Unicode range are illegal.
3091 4
                  ($mUcs4 > 0x10FFFF)
3092 4
              ) {
3093
                return false;
3094
              }
3095 4
              // initialize UTF8 cache
3096 4
              $mState = 0;
3097 4
              $mUcs4 = 0;
3098 4
              $mBytes = 1;
3099 4
            }
3100 4
          } else {
3101 4
            /**
3102 4
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3103 4
             * Incomplete multi-octet sequence.
3104 4
             */
3105 4
            return false;
3106 4
          }
3107 4
        }
3108 4
      }
3109 4
3110
      return true;
3111 4
    }
3112 4
  }
3113 4
3114
  /**
3115 4
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3116
   * Decodes a JSON string
3117 4
   *
3118
   * @link http://php.net/manual/en/function.json-decode.php
3119
   *
3120
   * @param string $json    <p>
3121
   *                        The <i>json</i> string being decoded.
3122
   *                        </p>
3123
   *                        <p>
3124
   *                        This function only works with UTF-8 encoded strings.
3125
   *                        </p>
3126
   *                        <p>PHP implements a superset of
3127 13
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3128
   *                        only supports these values when they are nested inside an array or an object.
3129 13
   *                        </p>
3130 13
   * @param bool   $assoc   [optional] <p>
3131
   *                        When <b>TRUE</b>, returned objects will be converted into
3132 13
   *                        associative arrays.
3133 1
   *                        </p>
3134 1
   * @param int    $depth   [optional] <p>
3135 1
   *                        User specified recursion depth.
3136
   *                        </p>
3137 13
   * @param int    $options [optional] <p>
3138
   *                        Bitmask of JSON decode options. Currently only
3139
   *                        <b>JSON_BIGINT_AS_STRING</b>
3140
   *                        is supported (default is to cast large integers as floats)
3141
   *                        </p>
3142
   *
3143
   * @return mixed the value encoded in <i>json</i> in appropriate
3144
   * PHP type. Values true, false and
3145
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3146
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3147
   * <i>json</i> cannot be decoded or if the encoded
3148
   * data is deeper than the recursion limit.
3149
   */
3150 18
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3151
  {
3152 18
    $json = self::filter($json);
3153 18
3154
    if (Bootup::is_php('5.4') === true) {
3155 18
      $json = json_decode($json, $assoc, $depth, $options);
3156
    } else {
3157 18
      $json = json_decode($json, $assoc, $depth);
3158
    }
3159 2
3160
    return $json;
3161 2
  }
3162
3163 1
  /**
3164 1
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3165
   * Returns the JSON representation of a value.
3166 2
   *
3167 2
   * @link http://php.net/manual/en/function.json-encode.php
3168
   *
3169 18
   * @param mixed $value   <p>
3170 18
   *                       The <i>value</i> being encoded. Can be any type except
3171 1
   *                       a resource.
3172 1
   *                       </p>
3173
   *                       <p>
3174 18
   *                       All string data must be UTF-8 encoded.
3175 18
   *                       </p>
3176
   *                       <p>PHP implements a superset of
3177 18
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3178
   *                       only supports these values when they are nested inside an array or an object.
3179
   *                       </p>
3180
   * @param int   $options [optional] <p>
3181
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3182
   *                       <b>JSON_HEX_TAG</b>,
3183
   *                       <b>JSON_HEX_AMP</b>,
3184
   *                       <b>JSON_HEX_APOS</b>,
3185
   *                       <b>JSON_NUMERIC_CHECK</b>,
3186
   *                       <b>JSON_PRETTY_PRINT</b>,
3187
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3188
   *                       <b>JSON_FORCE_OBJECT</b>,
3189
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3190
   *                       constants is described on
3191
   *                       the JSON constants page.
3192
   *                       </p>
3193
   * @param int   $depth   [optional] <p>
3194
   *                       Set the maximum depth. Must be greater than zero.
3195
   *                       </p>
3196
   *
3197
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3198
   */
3199
  public static function json_encode($value, $options = 0, $depth = 512)
3200
  {
3201
    $value = self::filter($value);
3202
3203
    if (Bootup::is_php('5.5')) {
3204
      $json = json_encode($value, $options, $depth);
3205
    } else {
3206
      $json = json_encode($value, $options);
3207
    }
3208
3209
    return $json;
3210
  }
3211
3212
  /**
3213
   * Makes string's first char lowercase.
3214
   *
3215
   * @param string $str <p>The input string</p>
3216
   *
3217
   * @return string <p>The resulting string</p>
3218
   */
3219
  public static function lcfirst($str)
3220
  {
3221
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3222
  }
3223
3224
  /**
3225
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3226
   *
3227
   * @param string $str   <p>The string to be trimmed</p>
3228
   * @param string $chars <p>Optional characters to be stripped</p>
3229
   *
3230 17
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3231
   */
3232 17 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3233 3
  {
3234
    $str = (string)$str;
3235
3236 16
    if (!isset($str[0])) {
3237
      return '';
3238
    }
3239
3240 16
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3241
    if ($chars === INF || !$chars) {
3242
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3243
    }
3244
3245
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3246
  }
3247
3248 16
  /**
3249 16
   * Returns the UTF-8 character with the maximum code point in the given data.
3250 15
   *
3251
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3252
   *
3253 9
   * @return string <p>The character with the highest code point than others.</p>
3254 9
   */
3255 9 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3256
  {
3257 9
    if (is_array($arg)) {
3258 1
      $arg = implode('', $arg);
3259
    }
3260
3261 9
    return self::chr(max(self::codepoints($arg)));
3262 4
  }
3263
3264
  /**
3265 9
   * Calculates and returns the maximum number of bytes taken by any
3266 5
   * UTF-8 encoded character in the given string.
3267
   *
3268
   * @param string $str <p>The original Unicode string.</p>
3269 9
   *
3270
   * @return int <p>Max byte lengths of the given chars.</p>
3271
   */
3272
  public static function max_chr_width($str)
3273
  {
3274
    $bytes = self::chr_size_list($str);
3275
    if (count($bytes) > 0) {
3276
      return (int)max($bytes);
3277
    } else {
3278
      return 0;
3279
    }
3280
  }
3281
3282
  /**
3283
   * Checks whether mbstring is available on the server.
3284
   *
3285 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3286
   */
3287
  public static function mbstring_loaded()
3288 1
  {
3289
    $return = extension_loaded('mbstring') ? true : false;
3290 1
3291 1
    if ($return === true) {
3292 1
      \mb_internal_encoding('UTF-8');
3293
    }
3294
3295 1
    return $return;
3296
  }
3297
3298
  /**
3299
   * Returns the UTF-8 character with the minimum code point in the given data.
3300
   *
3301
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3302
   *
3303 41
   * @return string <p>The character with the lowest code point than others.</p>
3304
   */
3305 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3306 41
  {
3307
    if (is_array($arg)) {
3308
      $arg = implode('', $arg);
3309
    }
3310
3311
    return self::chr(min(self::codepoints($arg)));
3312
  }
3313
3314
  /**
3315
   * alias for "UTF8::normalize_encoding()"
3316
   *
3317 1
   * @see UTF8::normalize_encoding()
3318
   *
3319 1
   * @param string $encoding
3320 1
   *
3321
   * @return string
3322
   *
3323 1
   * @deprecated
3324 1
   */
3325 1
  public static function normalizeEncoding($encoding)
3326
  {
3327
    return self::normalize_encoding($encoding);
3328 1
  }
3329
3330
  /**
3331 1
   * Normalize the encoding-"name" input.
3332
   *
3333
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3334
   *
3335 1
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3336 1
   */
3337 1
  public static function normalize_encoding($encoding)
3338
  {
3339
    static $staticNormalizeEncodingCache = array();
3340 1
3341
    if (!$encoding) {
3342
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3343 1
    }
3344
3345
    if ('UTF-8' === $encoding) {
3346
      return $encoding;
3347 1
    }
3348
3349 1
    if (in_array($encoding, self::$iconvEncoding, true)) {
3350 1
      return $encoding;
3351 1
    }
3352 1
3353 1
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3354
      return $staticNormalizeEncodingCache[$encoding];
3355
    }
3356
3357
    $encodingOrig = $encoding;
3358
    $encoding = strtoupper($encoding);
3359
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3360
3361
    $equivalences = array(
3362
        'ISO88591'    => 'ISO-8859-1',
3363
        'ISO8859'     => 'ISO-8859-1',
3364
        'ISO'         => 'ISO-8859-1',
3365 5
        'LATIN1'      => 'ISO-8859-1',
3366
        'LATIN'       => 'ISO-8859-1',
3367 5
        'WIN1252'     => 'ISO-8859-1',
3368
        'WINDOWS1252' => 'ISO-8859-1',
3369
        'UTF16'       => 'UTF-16',
3370
        'UTF32'       => 'UTF-32',
3371
        'UTF8'        => 'UTF-8',
3372
        'UTF'         => 'UTF-8',
3373
        'UTF7'        => 'UTF-7',
3374
        '8BIT'        => 'CP850',
3375
        'BINARY'      => 'CP850',
3376
    );
3377 10
3378
    if (!empty($equivalences[$encodingUpperHelper])) {
3379 10
      $encoding = $equivalences[$encodingUpperHelper];
3380 10
    }
3381 5
3382 5
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3383 10
3384
    return $encoding;
3385 10
  }
3386
3387
  /**
3388
   * Normalize some MS Word special characters.
3389
   *
3390
   * @param string $str <p>The string to be normalized.</p>
3391
   *
3392
   * @return string
3393
   */
3394 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3395
  {
3396 1
    // init
3397
    $str = (string)$str;
3398 1
3399 1
    if (!isset($str[0])) {
3400 1
      return '';
3401
    }
3402 1
3403 1
    static $UTF8_MSWORD_KEYS_CACHE = null;
3404 1
    static $UTF8_MSWORD_VALUES_CACHE = null;
3405 1
3406 1
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3407
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3408 1
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3409
    }
3410
3411
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3412
  }
3413
3414
  /**
3415
   * Normalize the whitespace.
3416
   *
3417
   * @param string $str                     <p>The string to be normalized.</p>
3418
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3419
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3420
   *                                        bidirectional text chars.</p>
3421
   *
3422
   * @return string
3423
   */
3424 45
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3425
  {
3426
    // init
3427 45
    $str = (string)$str;
3428
3429
    if (!isset($str[0])) {
3430
      return '';
3431 45
    }
3432 45
3433 45
    static $WHITESPACE_CACHE = array();
3434 45
    $cacheKey = (int)$keepNonBreakingSpace;
3435
3436 45
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3437
3438
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3439 45
3440 45
      if ($keepNonBreakingSpace === true) {
3441
        /** @noinspection OffsetOperationsInspection */
3442 45
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3443
      }
3444
3445
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3446
    }
3447
3448
    if ($keepBidiUnicodeControls === false) {
3449
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3450
3451
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3452
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3453 45
      }
3454
3455 45
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3456
    }
3457 45
3458 45
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3459 45
  }
3460
3461 45
  /**
3462 45
   * Format a number with grouped thousands.
3463 45
   *
3464
   * @param float  $number
3465 45
   * @param int    $decimals
3466
   * @param string $dec_point
3467
   * @param string $thousands_sep
3468
   *
3469
   * @return string
3470
   *    *
3471
   * @deprecated Because this has nothing to do with UTF8. :/
3472
   */
3473
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3474
  {
3475
    $thousands_sep = (string)$thousands_sep;
3476 23
    $dec_point = (string)$dec_point;
3477
3478 23
    if (
3479
        isset($thousands_sep[1], $dec_point[1])
3480 23
        &&
3481 5
        Bootup::is_php('5.4') === true
3482
    ) {
3483
      return str_replace(
3484
          array(
3485 19
              '.',
3486 3
              ',',
3487
          ),
3488
          array(
3489 18
              $dec_point,
3490
              $thousands_sep,
3491 18
          ),
3492
          number_format($number, $decimals, '.', ',')
3493
      );
3494
    }
3495
3496
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3497
  }
3498
3499
  /**
3500
   * Calculates Unicode code point of the given UTF-8 encoded character.
3501
   *
3502 52
   * INFO: opposite to UTF8::chr()
3503
   *
3504 52
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3505
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3506 52
   *
3507
   * @return int <p>
3508 52
   *             Unicode code point of the given character,<br />
3509 40
   *             0 on invalid UTF-8 byte sequence.
3510
   *             </p>
3511
   */
3512 18
  public static function ord($chr, $encoding = 'UTF-8')
3513
  {
3514
    if (!$chr && $chr !== '0') {
3515 18
      return 0;
3516 17
    }
3517
3518 17
    if ($encoding !== 'UTF-8') {
3519 17
      $encoding = self::normalize_encoding($encoding);
3520 17
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3521 2
    }
3522 2
3523
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3524
      self::checkForSupport();
3525 18
    }
3526
3527 18
    if (self::$support['intlChar'] === true) {
3528 18
      $tmpReturn = \IntlChar::ord($chr);
3529 18
      if ($tmpReturn) {
3530
        return $tmpReturn;
3531 18
      }
3532 18
    }
3533 18
3534
    // use static cache, if there is no support for "IntlChar"
3535
    static $cache = array();
3536
    if (isset($cache[$chr]) === true) {
3537 18
      return $cache[$chr];
3538
    }
3539 18
3540
    $chr_orig = $chr;
3541
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3542
    $chr = unpack('C*', substr($chr, 0, 4));
3543
    $code = $chr ? $chr[1] : 0;
3544
3545
    if (0xF0 <= $code && isset($chr[4])) {
3546
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3547
    }
3548
3549
    if (0xE0 <= $code && isset($chr[3])) {
3550
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3551
    }
3552
3553
    if (0xC0 <= $code && isset($chr[2])) {
3554
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3555
    }
3556
3557
    return $cache[$chr_orig] = $code;
3558
  }
3559
3560 1
  /**
3561
   * Parses the string into an array (into the the second parameter).
3562 1
   *
3563 1
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3564
   *          if the second parameter is not set!
3565
   *
3566
   * @link http://php.net/manual/en/function.parse-str.php
3567
   *
3568 1
   * @param string  $str       <p>The input string.</p>
3569 1
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3570 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3571 1
   *
3572
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3573
   */
3574 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3575
  {
3576
    if ($cleanUtf8 === true) {
3577
      $str = self::clean($str);
3578
    }
3579
3580
    $return = \mb_parse_str($str, $result);
3581
    if ($return === false || empty($result)) {
3582
      return false;
3583
    }
3584
3585
    return true;
3586 36
  }
3587
3588 36
  /**
3589
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3590 36
   *
3591 2
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3592
   */
3593
  public static function pcre_utf8_support()
3594
  {
3595 36
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3596 36
    return (bool)@preg_match('//u', '');
3597
  }
3598 36
3599
  /**
3600
   * Create an array containing a range of UTF-8 characters.
3601
   *
3602 36
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3603
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3604 36
   *
3605 6
   * @return array
3606 6
   */
3607
  public static function range($var1, $var2)
3608 36
  {
3609 36
    if (!$var1 || !$var2) {
3610 36
      return array();
3611 36
    }
3612 36
3613 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3614 36
      $start = (int)$var1;
3615
    } elseif (ctype_xdigit($var1)) {
3616
      $start = (int)self::hex_to_int($var1);
3617
    } else {
3618
      $start = self::ord($var1);
3619
    }
3620
3621
    if (!$start) {
3622
      return array();
3623
    }
3624
3625 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3626
      $end = (int)$var2;
3627
    } elseif (ctype_xdigit($var2)) {
3628
      $end = (int)self::hex_to_int($var2);
3629
    } else {
3630
      $end = self::ord($var2);
3631
    }
3632
3633
    if (!$end) {
3634
      return array();
3635
    }
3636
3637
    return array_map(
3638
        array(
3639
            '\\voku\\helper\\UTF8',
3640
            'chr',
3641
        ),
3642
        range($start, $end)
3643
    );
3644
  }
3645
3646 36
  /**
3647 5
   * Multi decode html entity & fix urlencoded-win1252-chars.
3648
   *
3649 5
   * e.g:
3650 5
   * 'test+test'                     => 'test+test'
3651
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3652
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3653 36
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3654
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3655
   * 'Düsseldorf'                   => 'Düsseldorf'
3656
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3657 36
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3658
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3659
   *
3660
   * @param string $str          <p>The input string.</p>
3661
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3662
   *
3663
   * @return string
3664
   */
3665 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3666
  {
3667
    $str = (string)$str;
3668
3669
    if (!isset($str[0])) {
3670 12
      return '';
3671
    }
3672
3673
    $pattern = '/%u([0-9a-f]{3,4})/i';
3674
    if (preg_match($pattern, $str)) {
3675
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3676 12
    }
3677 2
3678 1
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3679 2
3680 1
    do {
3681 2
      $str_compare = $str;
3682
3683 2
      $str = self::fix_simple_utf8(
3684
          rawurldecode(
3685
              self::html_entity_decode(
3686 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3687
                  $flags
3688
              )
3689
          )
3690
      );
3691
3692 12
    } while ($multi_decode === true && $str_compare !== $str);
3693 3
3694
    return (string)$str;
3695
  }
3696
3697
  /**
3698
   * alias for "UTF8::remove_bom()"
3699
   *
3700 12
   * @see UTF8::remove_bom()
3701 9
   *
3702
   * @param string $str
3703
   *
3704
   * @return string
3705
   *
3706
   * @deprecated
3707
   */
3708
  public static function removeBOM($str)
3709
  {
3710 6
    return self::remove_bom($str);
3711 6
  }
3712 6
3713 6
  /**
3714 6
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3715 6
   *
3716 6
   * @param string $str <p>The input string.</p>
3717 6
   *
3718 6
   * @return string <p>String without UTF-BOM</p>
3719 6
   */
3720 6
  public static function remove_bom($str)
3721 6
  {
3722 6
    foreach (self::$bom as $bomString => $bomByteLength) {
3723 6
      if (0 === strpos($str, $bomString)) {
3724 6
        $str = substr($str, $bomByteLength);
3725 6
      }
3726 6
    }
3727 6
3728 6
    return $str;
3729 6
  }
3730 6
3731
  /**
3732 6
   * Removes duplicate occurrences of a string in another string.
3733 6
   *
3734 6
   * @param string          $str  <p>The base string.</p>
3735
   * @param string|string[] $what <p>String to search for in the base string.</p>
3736
   *
3737
   * @return string <p>The result string with removed duplicates.</p>
3738
   */
3739
  public static function remove_duplicates($str, $what = ' ')
3740
  {
3741
    if (is_string($what)) {
3742
      $what = array($what);
3743
    }
3744
3745
    if (is_array($what)) {
3746
      /** @noinspection ForeachSourceInspection */
3747
      foreach ($what as $item) {
3748
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3749
      }
3750
    }
3751
3752
    return $str;
3753
  }
3754
3755
  /**
3756
   * Remove invisible characters from a string.
3757
   *
3758
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3759
   *
3760
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3761
   *
3762
   * @param string $str
3763
   * @param bool   $url_encoded
3764
   * @param string $replacement
3765
   *
3766
   * @return string
3767
   */
3768
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3769
  {
3770
    // init
3771
    $non_displayables = array();
3772
3773
    // every control character except newline (dec 10),
3774
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3775
    if ($url_encoded) {
3776
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3777
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3778 14
    }
3779
3780 14
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3781
3782
    do {
3783 14
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3784 14
    } while ($count !== 0);
3785 1
3786 1
    return $str;
3787 13
  }
3788
3789 14
  /**
3790
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3791 14
   *
3792 14
   * @param string $str                <p>The input string</p>
3793
   * @param string $replacementChar    <p>The replacement character.</p>
3794 14
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3795
   *
3796
   * @return string
3797
   */
3798
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
3799
  {
3800
    $str = (string)$str;
3801
3802
    if (!isset($str[0])) {
3803
      return '';
3804
    }
3805
3806 1
    if ($processInvalidUtf8 === true) {
3807
      $replacementCharHelper = $replacementChar;
3808 1
      if ($replacementChar === '') {
3809
        $replacementCharHelper = 'none';
3810 1
      }
3811
3812
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3813
        self::checkForSupport();
3814 1
      }
3815
3816 1
      if (self::$support['mbstring'] === false) {
3817
        trigger_error('UTF8::replace_diamond_question_mark() without mbstring cannot handle all chars correctly', E_USER_WARNING);
3818
      }
3819
3820 1
      $save = \mb_substitute_character();
3821 1
      \mb_substitute_character($replacementCharHelper);
3822
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3823
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3824 1
      \mb_substitute_character($save);
3825 1
    }
3826 1
3827 1
    return str_replace(
3828
        array(
3829 1
            "\xEF\xBF\xBD",
3830
            '�',
3831
        ),
3832 1
        array(
3833
            $replacementChar,
3834
            $replacementChar,
3835 1
        ),
3836
        $str
3837
    );
3838
  }
3839
3840
  /**
3841
   * Strip whitespace or other characters from end of a UTF-8 string.
3842
   *
3843
   * @param string $str   <p>The string to be trimmed.</p>
3844
   * @param string $chars <p>Optional characters to be stripped.</p>
3845
   *
3846
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3847
   */
3848 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3849
  {
3850
    $str = (string)$str;
3851 2
3852
    if (!isset($str[0])) {
3853 2
      return '';
3854
    }
3855
3856 2
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3857 2
    if ($chars === INF || !$chars) {
3858
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3859 2
    }
3860
3861 2
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3862 2
  }
3863
3864 2
  /**
3865
   * rxClass
3866
   *
3867 2
   * @param string $s
3868 2
   * @param string $class
3869 2
   *
3870 2
   * @return string
3871 2
   */
3872
  private static function rxClass($s, $class = '')
3873 2
  {
3874 2
    static $rxClassCache = array();
3875 2
3876 2
    $cacheKey = $s . $class;
3877 2
3878 2
    if (isset($rxClassCache[$cacheKey])) {
3879
      return $rxClassCache[$cacheKey];
3880 2
    }
3881 2
3882 2
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3883 2
    $class = array($class);
3884 2
3885 2
    /** @noinspection SuspiciousLoopInspection */
3886
    foreach (self::str_split($s) as $s) {
3887 2
      if ('-' === $s) {
3888
        $class[0] = '-' . $class[0];
3889
      } elseif (!isset($s[2])) {
3890 2
        $class[0] .= preg_quote($s, '/');
3891
      } elseif (1 === self::strlen($s)) {
3892
        $class[0] .= $s;
3893
      } else {
3894
        $class[] = $s;
3895
      }
3896
    }
3897
3898
    if ($class[0]) {
3899
      $class[0] = '[' . $class[0] . ']';
3900
    }
3901
3902
    if (1 === count($class)) {
3903
      $return = $class[0];
3904
    } else {
3905
      $return = '(?:' . implode('|', $class) . ')';
3906
    }
3907
3908
    $rxClassCache[$cacheKey] = $return;
3909
3910
    return $return;
3911 1
  }
3912
3913 1
  /**
3914
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3915 1
   */
3916
  public static function showSupport()
3917
  {
3918
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3919
      self::checkForSupport();
3920
    }
3921
3922
    foreach (self::$support as $utf8Support) {
3923
      echo $utf8Support . "\n<br>";
3924
    }
3925
  }
3926
3927
  /**
3928
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3929
   *
3930
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3931
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3932
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3933
   *
3934
   * @return string <p>The HTML numbered entity.</p>
3935
   */
3936
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3937
  {
3938
    // init
3939
    $char = (string)$char;
3940
3941
    if (!isset($char[0])) {
3942
      return '';
3943
    }
3944
3945
    if (
3946
        $keepAsciiChars === true
3947 12
        &&
3948
        self::is_ascii($char) === true
3949 12
    ) {
3950
      return $char;
3951
    }
3952
3953
    if ($encoding !== 'UTF-8') {
3954
      $encoding = self::normalize_encoding($encoding);
3955
    }
3956
3957
    return '&#' . self::ord($char, $encoding) . ';';
3958
  }
3959 1
3960
  /**
3961 1
   * Convert a string to an array of Unicode characters.
3962
   *
3963 1
   * @param string  $str       <p>The string to split into array.</p>
3964
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3965 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3966
   *
3967
   * @return string[] <p>An array containing chunks of the string.</p>
3968
   */
3969
  public static function split($str, $length = 1, $cleanUtf8 = false)
3970
  {
3971
    $str = (string)$str;
3972
3973
    if (!isset($str[0])) {
3974
      return array();
3975
    }
3976
3977 1
    // init
3978
    $str = (string)$str;
3979 1
    $ret = array();
3980
3981 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3982 1
      self::checkForSupport();
3983 1
    }
3984
3985 1
    if (self::$support['pcre_utf8'] === true) {
3986 1
3987 1
      if ($cleanUtf8 === true) {
3988 1
        $str = self::clean($str);
3989
      }
3990
3991 1
      preg_match_all('/./us', $str, $retArray);
3992
      if (isset($retArray[0])) {
3993
        $ret = $retArray[0];
3994
      }
3995
      unset($retArray);
3996
3997
    } else {
3998
3999
      // fallback
4000
4001
      $len = strlen($str);
4002 21
4003
      /** @noinspection ForeachInvariantsInspection */
4004
      for ($i = 0; $i < $len; $i++) {
4005 21
        if (($str[$i] & "\x80") === "\x00") {
4006 21
          $ret[] = $str[$i];
4007
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4008 21
          if (($str[$i + 1] & "\xC0") === "\x80") {
4009 1
            $ret[] = $str[$i] . $str[$i + 1];
4010
4011
            $i++;
4012 20
          }
4013 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4014
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4015
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4016 20
4017 20
            $i += 2;
4018
          }
4019 20
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4020 20 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4021
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4022
4023 1
            $i += 3;
4024 1
          }
4025
        }
4026
      }
4027 1
    }
4028 1
4029 1
    if ($length > 1) {
4030 1
      $ret = array_chunk($ret, $length);
4031 1
4032
      return array_map(
4033 1
          function ($item) {
4034
            return implode('', $item);
4035 1
          }, $ret
4036
      );
4037
    }
4038
4039
    /** @noinspection OffsetOperationsInspection */
4040
    if (isset($ret[0]) && $ret[0] === '') {
4041
      return array();
4042
    }
4043
4044
    return $ret;
4045 1
  }
4046
4047 1
  /**
4048
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4049 1
   *
4050
   * @param string $str <p>The input string.</p>
4051 1
   *
4052
   * @return false|string <p>
4053
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4054
   *                      otherwise it will return false.
4055
   *                      </p>
4056
   */
4057
  public static function str_detect_encoding($str)
4058
  {
4059
    //
4060
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4061
    //
4062
4063
    if (self::is_binary($str)) {
4064
      if (self::is_utf16($str) === 1) {
4065 7
        return 'UTF-16LE';
4066
      } elseif (self::is_utf16($str) === 2) {
4067 7
        return 'UTF-16BE';
4068
      } elseif (self::is_utf32($str) === 1) {
4069
        return 'UTF-32LE';
4070
      } elseif (self::is_utf32($str) === 2) {
4071
        return 'UTF-32BE';
4072
      }
4073
    }
4074
4075
    //
4076
    // 2.) simple check for ASCII chars
4077
    //
4078
4079
    if (self::is_ascii($str) === true) {
4080
      return 'ASCII';
4081
    }
4082
4083 1
    //
4084
    // 3.) simple check for UTF-8 chars
4085 1
    //
4086 1
4087
    if (self::is_utf8($str) === true) {
4088 1
      return 'UTF-8';
4089
    }
4090 1
4091
    //
4092 1
    // 4.) check via "\mb_detect_encoding()"
4093 1
    //
4094 1
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4095 1
4096
    $detectOrder = array(
4097 1
        'ISO-8859-1',
4098
        'ISO-8859-2',
4099 1
        'ISO-8859-3',
4100 1
        'ISO-8859-4',
4101 1
        'ISO-8859-5',
4102 1
        'ISO-8859-6',
4103 1
        'ISO-8859-7',
4104 1
        'ISO-8859-8',
4105
        'ISO-8859-9',
4106 1
        'ISO-8859-10',
4107
        'ISO-8859-13',
4108 1
        'ISO-8859-14',
4109
        'ISO-8859-15',
4110
        'ISO-8859-16',
4111
        'WINDOWS-1251',
4112 1
        'WINDOWS-1252',
4113
        'WINDOWS-1254',
4114
        'ISO-2022-JP',
4115
        'JIS',
4116
        'EUC-JP',
4117
    );
4118
4119
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4120
    if ($encoding) {
4121
      return $encoding;
4122
    }
4123
4124
    //
4125
    // 5.) check via "iconv()"
4126
    //
4127
4128
    $md5 = md5($str);
4129 9
    foreach (self::$iconvEncoding as $encodingTmp) {
4130
      # INFO: //IGNORE and //TRANSLIT still throw notice
4131 9
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4132
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4133
        return $encodingTmp;
4134
      }
4135
    }
4136
4137
    return false;
4138
  }
4139
4140
  /**
4141
   * Check if the string ends with the given substring.
4142
   *
4143
   * @param string $haystack <p>The string to search in.</p>
4144
   * @param string $needle   <p>The substring to search for.</p>
4145
   *
4146
   * @return bool
4147 1
   */
4148 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4149 1
  {
4150
    $haystack = (string)$haystack;
4151
    $needle = (string)$needle;
4152
4153
    if (!isset($haystack[0], $needle[0])) {
4154
      return false;
4155
    }
4156
4157
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4158
      return true;
4159
    }
4160
4161
    return false;
4162
  }
4163
4164 12
  /**
4165
   * Check if the string ends with the given substring, case insensitive.
4166 12
   *
4167 11
   * @param string $haystack <p>The string to search in.</p>
4168 11
   * @param string $needle   <p>The substring to search for.</p>
4169 12
   *
4170
   * @return bool
4171
   */
4172 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4173
  {
4174
    $haystack = (string)$haystack;
4175
    $needle = (string)$needle;
4176
4177
    if (!isset($haystack[0], $needle[0])) {
4178
      return false;
4179
    }
4180
4181
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4182 9
      return true;
4183
    }
4184 9
4185 1
    return false;
4186
  }
4187
4188 8
  /**
4189 2
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4190 2
   *
4191
   * @link  http://php.net/manual/en/function.str-ireplace.php
4192 8
   *
4193 8
   * @param mixed $search  <p>
4194 1
   *                       Every replacement with search array is
4195
   *                       performed on the result of previous replacement.
4196
   *                       </p>
4197 7
   * @param mixed $replace <p>
4198
   *                       </p>
4199 7
   * @param mixed $subject <p>
4200
   *                       If subject is an array, then the search and
4201
   *                       replace is performed with every entry of
4202 1
   *                       subject, and the return value is an array as
4203
   *                       well.
4204
   *                       </p>
4205
   * @param int   $count   [optional] <p>
4206
   *                       The number of matched and replaced needles will
4207
   *                       be returned in count which is passed by
4208
   *                       reference.
4209
   *                       </p>
4210
   *
4211
   * @return mixed <p>A string or an array of replacements.</p>
4212
   */
4213
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4214
  {
4215
    $search = (array)$search;
4216
4217
    /** @noinspection AlterInForeachInspection */
4218 1
    foreach ($search as &$s) {
4219
      if ('' === $s .= '') {
4220 1
        $s = '/^(?<=.)$/';
4221
      } else {
4222
        $s = '/' . preg_quote($s, '/') . '/ui';
4223
      }
4224
    }
4225
4226
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4227
    $count = $replace; // used as reference parameter
4228
4229
    return $subject;
4230
  }
4231
4232 2
  /**
4233
   * Check if the string starts with the given substring, case insensitive.
4234 2
   *
4235 2
   * @param string $haystack <p>The string to search in.</p>
4236
   * @param string $needle   <p>The substring to search for.</p>
4237 2
   *
4238 2
   * @return bool
4239 2
   */
4240 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4241 2
  {
4242 2
    $haystack = (string)$haystack;
4243
    $needle = (string)$needle;
4244
4245
    if (!isset($haystack[0], $needle[0])) {
4246
      return false;
4247
    }
4248
4249
    if (self::stripos($haystack, $needle) === 0) {
4250
      return true;
4251
    }
4252 3
4253
    return false;
4254 3
  }
4255 3
4256 3
  /**
4257
   * Limit the number of characters in a string, but also after the next word.
4258 3
   *
4259
   * @param string $str
4260 3
   * @param int    $length
4261
   * @param string $strAddOn
4262
   *
4263
   * @return string
4264
   */
4265
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4266
  {
4267
    $str = (string)$str;
4268
4269
    if (!isset($str[0])) {
4270
      return '';
4271
    }
4272
4273
    $length = (int)$length;
4274
4275
    if (self::strlen($str) <= $length) {
4276
      return $str;
4277
    }
4278
4279
    if (self::substr($str, $length - 1, 1) === ' ') {
4280
      return self::substr($str, 0, $length - 1) . $strAddOn;
4281
    }
4282 2
4283
    $str = self::substr($str, 0, $length);
4284
    $array = explode(' ', $str);
4285 2
    array_pop($array);
4286
    $new_str = implode(' ', $array);
4287 2
4288
    if ($new_str === '') {
4289
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4290
    } else {
4291
      $str = $new_str . $strAddOn;
4292
    }
4293
4294
    return $str;
4295
  }
4296
4297
  /**
4298
   * Pad a UTF-8 string to given length with another string.
4299
   *
4300
   * @param string $str        <p>The input string.</p>
4301
   * @param int    $pad_length <p>The length of return string.</p>
4302
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4303
   * @param int    $pad_type   [optional] <p>
4304
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4305
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4306
   *                           </p>
4307
   *
4308
   * @return string <strong>Returns the padded string</strong>
4309
   */
4310
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4311
  {
4312
    $str_length = self::strlen($str);
4313
4314 8
    if (
4315
        is_int($pad_length) === true
4316 8
        &&
4317 8
        $pad_length > 0
4318
        &&
4319 8
        $pad_length >= $str_length
4320 3
    ) {
4321
      $ps_length = self::strlen($pad_string);
4322
4323 7
      $diff = $pad_length - $str_length;
4324 1
4325 1
      switch ($pad_type) {
4326 1 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4327
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4328
          $pre = self::substr($pre, 0, $diff);
4329
          $post = '';
4330 7
          break;
4331 1
4332 7
        case STR_PAD_BOTH:
4333 7
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4334 7
          $pre = self::substr($pre, 0, (int)$diff / 2);
4335
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4336
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4337
          break;
4338 7
4339
        case STR_PAD_RIGHT:
4340 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4341
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4342
          $post = self::substr($post, 0, $diff);
4343
          $pre = '';
4344
      }
4345
4346
      return $pre . $str . $post;
4347
    }
4348
4349
    return $str;
4350
  }
4351
4352
  /**
4353
   * Repeat a string.
4354
   *
4355 8
   * @param string $str        <p>
4356
   *                           The string to be repeated.
4357 8
   *                           </p>
4358 2
   * @param int    $multiplier <p>
4359
   *                           Number of time the input string should be
4360
   *                           repeated.
4361 6
   *                           </p>
4362
   *                           <p>
4363
   *                           multiplier has to be greater than or equal to 0.
4364
   *                           If the multiplier is set to 0, the function
4365 6
   *                           will return an empty string.
4366
   *                           </p>
4367
   *
4368
   * @return string <p>The repeated string.</p>
4369
   */
4370
  public static function str_repeat($str, $multiplier)
4371
  {
4372 6
    $str = self::filter($str);
4373
4374
    return str_repeat($str, $multiplier);
4375
  }
4376
4377
  /**
4378
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4379
   *
4380
   * Replace all occurrences of the search string with the replacement string
4381
   *
4382
   * @link http://php.net/manual/en/function.str-replace.php
4383
   *
4384
   * @param mixed $search  <p>
4385
   *                       The value being searched for, otherwise known as the needle.
4386
   *                       An array may be used to designate multiple needles.
4387 62
   *                       </p>
4388
   * @param mixed $replace <p>
4389 62
   *                       The replacement value that replaces found search
4390
   *                       values. An array may be used to designate multiple replacements.
4391 62
   *                       </p>
4392 4
   * @param mixed $subject <p>
4393
   *                       The string or array being searched and replaced on,
4394
   *                       otherwise known as the haystack.
4395
   *                       </p>
4396
   *                       <p>
4397 61
   *                       If subject is an array, then the search and
4398 2
   *                       replace is performed with every entry of
4399 61
   *                       subject, and the return value is an array as
4400 60
   *                       well.
4401 60
   *                       </p>
4402 2
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4403
   *
4404
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4405
   */
4406 61
  public static function str_replace($search, $replace, $subject, &$count = null)
4407 61
  {
4408 1
    return str_replace($search, $replace, $subject, $count);
4409
  }
4410
4411 61
  /**
4412 2
   * Replace the first "$search"-term with the "$replace"-term.
4413 2
   *
4414
   * @param string $search
4415 61
   * @param string $replace
4416
   * @param string $subject
4417
   *
4418
   * @return string
4419
   */
4420
  public static function str_replace_first($search, $replace, $subject)
4421
  {
4422
    $pos = self::strpos($subject, $search);
4423
4424
    if ($pos !== false) {
4425
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4426
    }
4427
4428
    return $subject;
4429
  }
4430 1
4431
  /**
4432 1
   * Shuffles all the characters in the string.
4433
   *
4434
   * @param string $str <p>The input string</p>
4435
   *
4436
   * @return string <p>The shuffled string.</p>
4437
   */
4438
  public static function str_shuffle($str)
4439
  {
4440
    $array = self::split($str);
4441
4442
    shuffle($array);
4443
4444
    return implode('', $array);
4445
  }
4446
4447
  /**
4448
   * Sort all characters according to code points.
4449 2
   *
4450
   * @param string $str    <p>A UTF-8 string.</p>
4451 2
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4452
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4453
   *
4454
   * @return string <p>String of sorted characters.</p>
4455
   */
4456
  public static function str_sort($str, $unique = false, $desc = false)
4457
  {
4458
    $array = self::codepoints($str);
4459
4460
    if ($unique) {
4461
      $array = array_flip(array_flip($array));
4462
    }
4463
4464
    if ($desc) {
4465
      arsort($array);
4466
    } else {
4467 1
      asort($array);
4468
    }
4469 1
4470
    return self::string($array);
4471
  }
4472
4473
  /**
4474
   * Split a string into an array.
4475
   *
4476
   * @param string $str
4477
   * @param int    $len
4478
   *
4479
   * @return array
4480
   */
4481
  public static function str_split($str, $len = 1)
4482
  {
4483
    // init
4484
    $len = (int)$len;
4485 2
    $str = (string)$str;
4486
4487 2
    if (!isset($str[0])) {
4488 2
      return array();
4489
    }
4490 2
4491
    if ($len < 1) {
4492
      return str_split($str, $len);
4493
    }
4494
4495
    /** @noinspection PhpInternalEntityUsedInspection */
4496
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4497
    $a = $a[0];
4498
4499
    if ($len === 1) {
4500
      return $a;
4501
    }
4502
4503 1
    $arrayOutput = array();
4504
    $p = -1;
4505 1
4506 1
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4507
    foreach ($a as $l => $a) {
4508 1
      if ($l % $len) {
4509 1
        $arrayOutput[$p] .= $a;
4510
      } else {
4511
        $arrayOutput[++$p] = $a;
4512 1
      }
4513 1
    }
4514
4515 1
    return $arrayOutput;
4516
  }
4517
4518
  /**
4519
   * Check if the string starts with the given substring.
4520
   *
4521
   * @param string $haystack <p>The string to search in.</p>
4522
   * @param string $needle   <p>The substring to search for.</p>
4523
   *
4524
   * @return bool
4525
   */
4526 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4527
  {
4528
    $haystack = (string)$haystack;
4529
    $needle = (string)$needle;
4530
4531
    if (!isset($haystack[0], $needle[0])) {
4532
      return false;
4533
    }
4534
4535 15
    if (self::strpos($haystack, $needle) === 0) {
4536
      return true;
4537 15
    }
4538 15
4539
    return false;
4540 15
  }
4541 2
4542
  /**
4543
   * Get a binary representation of a specific string.
4544
   *
4545 14
   * @param string $str <p>The input string.</p>
4546
   *
4547
   * @return string
4548
   */
4549 14
  public static function str_to_binary($str)
4550
  {
4551
    $str = (string)$str;
4552
4553 14
    $value = unpack('H*', $str);
4554
4555
    return base_convert($value[1], 16, 2);
4556 2
  }
4557 2
4558 2
  /**
4559
   * Convert a string into an array of words.
4560 14
   *
4561
   * @param string $str
4562
   * @param string $charlist
4563
   *
4564
   * @return array
4565
   */
4566 14
  public static function str_to_words($str, $charlist = '')
4567 2
  {
4568 14
    $str = (string)$str;
4569 14
4570 14
    if (!isset($str[0])) {
4571 1
      return array('');
4572
    }
4573
4574 14
    $charlist = self::rxClass($charlist, '\pL');
4575 14
4576
    return \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4577
  }
4578
4579
  /**
4580
   * alias for "UTF8::to_ascii()"
4581
   *
4582
   * @see UTF8::to_ascii()
4583
   *
4584
   * @param string $str
4585
   * @param string $unknown
4586
   * @param bool   $strict
4587
   *
4588
   * @return string
4589
   */
4590
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4591
  {
4592
    return self::to_ascii($str, $unknown, $strict);
4593
  }
4594
4595
  /**
4596
   * Counts number of words in the UTF-8 string.
4597
   *
4598
   * @param string $str      <p>The input string.</p>
4599
   * @param int    $format   [optional] <p>
4600
   *                         <strong>0</strong> => return a number of words (default)<br />
4601
   *                         <strong>1</strong> => return an array of words<br />
4602
   *                         <strong>2</strong> => return an array of words with word-offset as key
4603
   *                         </p>
4604
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4605
   *
4606
   * @return array|int <p>The number of words in the string</p>
4607
   */
4608
  public static function str_word_count($str, $format = 0, $charlist = '')
4609
  {
4610
    $strParts = self::str_to_words($str, $charlist);
4611
4612
    $len = count($strParts);
4613
4614
    if ($format === 1) {
4615
4616
      $numberOfWords = array();
4617
      for ($i = 1; $i < $len; $i += 2) {
4618
        $numberOfWords[] = $strParts[$i];
4619
      }
4620 1
4621
    } elseif ($format === 2) {
4622 1
4623 1
      $numberOfWords = array();
4624 1
      $offset = self::strlen($strParts[0]);
4625
      for ($i = 1; $i < $len; $i += 2) {
4626 1
        $numberOfWords[$offset] = $strParts[$i];
4627
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4628
      }
4629
4630
    } else {
4631
4632
      $numberOfWords = ($len - 1) / 2;
4633 1
4634
    }
4635
4636
    return $numberOfWords;
4637
  }
4638
4639
  /**
4640
   * Case-insensitive string comparison.
4641
   *
4642
   * INFO: Case-insensitive version of UTF8::strcmp()
4643 4
   *
4644
   * @param string $str1
4645 4
   * @param string $str2
4646
   *
4647 4
   * @return int <p>
4648 2
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4649
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4650
   *             <strong>0</strong> if they are equal.
4651 3
   *             </p>
4652
   */
4653
  public static function strcasecmp($str1, $str2)
4654
  {
4655
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4656
  }
4657
4658
  /**
4659
   * alias for "UTF8::strstr()"
4660
   *
4661
   * @see UTF8::strstr()
4662
   *
4663
   * @param string  $haystack
4664
   * @param string  $needle
4665
   * @param bool    $before_needle
4666
   * @param string  $encoding
4667
   * @param boolean $cleanUtf8
4668
   *
4669
   * @return string|false
4670
   */
4671
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4672
  {
4673
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4674
  }
4675
4676
  /**
4677 1
   * Case-sensitive string comparison.
4678
   *
4679 1
   * @param string $str1
4680 1
   * @param string $str2
4681 1
   *
4682
   * @return int  <p>
4683 1
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4684
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4685
   *              <strong>0</strong> if they are equal.
4686
   *              </p>
4687
   */
4688
  public static function strcmp($str1, $str2)
4689
  {
4690 1
    /** @noinspection PhpUndefinedClassInspection */
4691
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4692
        \Normalizer::normalize($str1, \Normalizer::NFD),
4693
        \Normalizer::normalize($str2, \Normalizer::NFD)
4694
    );
4695
  }
4696
4697
  /**
4698
   * Find length of initial segment not matching mask.
4699
   *
4700
   * @param string $str
4701
   * @param string $charList
4702
   * @param int    $offset
4703
   * @param int    $length
4704
   *
4705
   * @return int|null
4706
   */
4707 1
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4708
  {
4709 1
    if ('' === $charList .= '') {
4710
      return null;
4711
    }
4712
4713
    if ($offset || 2147483647 !== $length) {
4714
      $str = (string)self::substr($str, $offset, $length);
4715
    }
4716
4717
    $str = (string)$str;
4718
    if (!isset($str[0])) {
4719
      return null;
4720
    }
4721
4722
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4723
      /** @noinspection OffsetOperationsInspection */
4724
      return self::strlen($length[1]);
4725
    }
4726
4727
    return self::strlen($str);
4728
  }
4729 11
4730
  /**
4731 11
   * alias for "UTF8::stristr()"
4732
   *
4733 11
   * @see UTF8::stristr()
4734 2
   *
4735 2
   * @param string  $haystack
4736
   * @param string  $needle
4737 11
   * @param bool    $before_needle
4738
   * @param string  $encoding
4739 11
   * @param boolean $cleanUtf8
4740 2
   *
4741
   * @return string|false
4742
   */
4743
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4744 10
  {
4745 10
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4746
  }
4747
4748
  /**
4749 10
   * Create a UTF-8 string from code points.
4750
   *
4751 10
   * INFO: opposite to UTF8::codepoints()
4752
   *
4753
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4754 3
   *
4755 3
   * @return string <p>UTF-8 encoded string.</p>
4756 3
   */
4757
  public static function string(array $array)
4758 10
  {
4759
    return implode(
4760
        '',
4761
        array_map(
4762
            array(
4763
                '\\voku\\helper\\UTF8',
4764 10
                'chr',
4765 1
            ),
4766 10
            $array
4767 10
        )
4768 10
    );
4769 1
  }
4770
4771
  /**
4772
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4773
   *
4774 10
   * @param string $str <p>The input string.</p>
4775 10
   *
4776 10
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4777 10
   */
4778
  public static function string_has_bom($str)
4779
  {
4780
    foreach (self::$bom as $bomString => $bomByteLength) {
4781
      if (0 === strpos($str, $bomString)) {
4782
        return true;
4783
      }
4784
    }
4785
4786
    return false;
4787
  }
4788
4789
  /**
4790
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4791
   *
4792
   * @link http://php.net/manual/en/function.strip-tags.php
4793
   *
4794
   * @param string  $str            <p>
4795
   *                                The input string.
4796
   *                                </p>
4797
   * @param string  $allowable_tags [optional] <p>
4798
   *                                You can use the optional second parameter to specify tags which should
4799
   *                                not be stripped.
4800
   *                                </p>
4801
   *                                <p>
4802
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4803
   *                                can not be changed with allowable_tags.
4804
   *                                </p>
4805
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
4806
   *
4807
   * @return string <p>The stripped string.</p>
4808
   */
4809
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
4810
  {
4811
    if ($cleanUtf8) {
4812
      $str = self::clean($str);
4813 10
    }
4814
4815
    return strip_tags($str, $allowable_tags);
4816 10
  }
4817 10
4818
  /**
4819 10
   * Finds position of first occurrence of a string within another, case insensitive.
4820 2
   *
4821 2
   * @link http://php.net/manual/en/function.mb-stripos.php
4822
   *
4823 10
   * @param string  $haystack  <p>
4824 10
   *                           The string from which to get the position of the first occurrence
4825 2
   *                           of needle
4826
   *                           </p>
4827
   * @param string  $needle    <p>
4828 8
   *                           The string to find in haystack
4829
   *                           </p>
4830
   * @param int     $offset    [optional] <p>
4831
   *                           The position in haystack
4832
   *                           to start searching
4833
   *                           </p>
4834
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4835
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4836
   *
4837
   * @return int|false <p>
4838
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4839
   *                   or false if needle is not found.
4840
   *                   </p>
4841
   */
4842
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4843
  {
4844
    $haystack = (string)$haystack;
4845 2
    $needle = (string)$needle;
4846
4847 2
    if (!isset($haystack[0], $needle[0])) {
4848
      return false;
4849
    }
4850
4851
    if ($cleanUtf8 === true) {
4852
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4853
      // if invalid characters are found in $haystack before $needle
4854 2
      $haystack = self::clean($haystack);
4855 1
      $needle = self::clean($needle);
4856 1
    }
4857
4858 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4859
        $encoding === 'UTF-8'
4860 2
        ||
4861 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4862 2
    ) {
4863 2
      $encoding = 'UTF-8';
4864
    } else {
4865
      $encoding = self::normalize_encoding($encoding);
4866
    }
4867
4868
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4869
      self::checkForSupport();
4870
    }
4871
4872
    if (
4873
        $encoding == 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4874
        &&
4875
        self::$support['intl'] === true
4876
        &&
4877
        Bootup::is_php('5.4')
4878
    ) {
4879
      return \grapheme_stripos($haystack, $needle, $offset);
4880
    }
4881
4882 11
    // fallback to "mb_"-function via polyfill
4883
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4884 11
  }
4885 11
4886 11
  /**
4887
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4888 11
   *
4889 1
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4890 1
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4891 1
   * @param bool    $before_needle [optional] <p>
4892
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4893 11
   *                               haystack before the first occurrence of the needle (excluding the needle).
4894
   *                               </p>
4895 11
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4896
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4897 11
   *
4898 1
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4899 1
   */
4900
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4901
  {
4902 11
    $haystack = (string)$haystack;
4903 11
    $needle = (string)$needle;
4904
4905 11
    if (!isset($haystack[0], $needle[0])) {
4906
      return false;
4907 11
    }
4908
4909
    if ($encoding !== 'UTF-8') {
4910
      $encoding = self::normalize_encoding($encoding);
4911
    }
4912
4913
    if ($cleanUtf8 === true) {
4914
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4915
      // if invalid characters are found in $haystack before $needle
4916
      $needle = self::clean($needle);
4917
      $haystack = self::clean($haystack);
4918
    }
4919
4920
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4921 21
      self::checkForSupport();
4922
    }
4923
4924 21 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4925
        $encoding !== 'UTF-8'
4926 21
        &&
4927 6
        self::$support['mbstring'] === false
4928
    ) {
4929
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4930 19
    }
4931
4932
    if (self::$support['mbstring'] === true) {
4933
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4934
    }
4935
4936 19
    if (self::$support['intl'] === true) {
4937 2
      return \grapheme_stristr($haystack, $needle, $before_needle);
4938 2
    }
4939
4940 19
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4941
4942
    if (!isset($match[1])) {
4943
      return false;
4944
    }
4945
4946
    if ($before_needle) {
4947
      return $match[1];
4948
    }
4949
4950 3
    return self::substr($haystack, self::strlen($match[1]));
4951
  }
4952 3
4953
  /**
4954
   * Get the string length, not the byte-length!
4955
   *
4956
   * @link     http://php.net/manual/en/function.mb-strlen.php
4957
   *
4958
   * @param string  $str       <p>The string being checked for length.</p>
4959
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4960
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4961
   *
4962
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4963
   *             character counted as +1)</p>
4964
   */
4965
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4966 16
  {
4967
    $str = (string)$str;
4968 16
4969
    if (!isset($str[0])) {
4970 16
      return 0;
4971 2
    }
4972
4973 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4974 15
        $encoding === 'UTF-8'
4975
        ||
4976
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4977
    ) {
4978
      $encoding = 'UTF-8';
4979
    } else {
4980 15
      $encoding = self::normalize_encoding($encoding);
4981 2
    }
4982 2
4983
    switch ($encoding) {
4984 15
      case 'ASCII':
4985
      case 'CP850':
4986
        return strlen($str);
4987
    }
4988
4989
    if ($cleanUtf8 === true) {
4990
      $str = self::clean($str);
4991
    }
4992
4993
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4994
      self::checkForSupport();
4995
    }
4996
4997 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4998
        $encoding !== 'UTF-8'
4999
        &&
5000
        self::$support['mbstring'] === false
5001 1
        &&
5002
        self::$support['iconv'] === false
5003 1
    ) {
5004 1
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5005 1
    }
5006 1
5007 1
    if (
5008
        $encoding !== 'UTF-8'
5009 1
        &&
5010 1
        self::$support['iconv'] === true
5011 1
        &&
5012 1
        self::$support['mbstring'] === false
5013 1
    ) {
5014
      $returnTmp = \iconv_strlen($str, $encoding);
5015 1
      if ($returnTmp !== false) {
5016 1
        return $returnTmp;
5017
      }
5018 1
    }
5019
5020
    if (self::$support['mbstring'] === true) {
5021
      return \mb_strlen($str, $encoding);
5022
    }
5023
5024
    if (self::$support['intl'] === true) {
5025
      $str = self::clean($str);
5026
      $returnTmp = \grapheme_strlen($str);
5027
      if ($returnTmp !== null) {
5028
        return $returnTmp;
5029
      }
5030 1
    }
5031
5032 1
    if (self::$support['iconv'] === true) {
5033 1
      $returnTmp = \iconv_strlen($str, $encoding);
5034 1
      if ($returnTmp !== false) {
5035
        return $returnTmp;
5036 1
      }
5037
    }
5038
5039
    // fallback via vanilla php
5040 1
    preg_match_all('/./us', $str, $parts);
5041 1
    $returnTmp = count($parts[0]);
5042
    if ($returnTmp !== 0) {
5043 1
      return $returnTmp;
5044
    }
5045
5046
    // fallback to "mb_"-function via polyfill
5047
    return \mb_strlen($str);
5048
  }
5049
5050
  /**
5051
   * Case insensitive string comparisons using a "natural order" algorithm.
5052
   *
5053
   * INFO: natural order version of UTF8::strcasecmp()
5054
   *
5055
   * @param string $str1 <p>The first string.</p>
5056
   * @param string $str2 <p>The second string.</p>
5057
   *
5058
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5059 47
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5060
   *             <strong>0</strong> if they are equal
5061
   */
5062 47
  public static function strnatcasecmp($str1, $str2)
5063
  {
5064 47
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5065 9
  }
5066
5067
  /**
5068 45
   * String comparisons using a "natural order" algorithm
5069
   *
5070
   * INFO: natural order version of UTF8::strcmp()
5071
   *
5072 1
   * @link  http://php.net/manual/en/function.strnatcmp.php
5073 1
   *
5074
   * @param string $str1 <p>The first string.</p>
5075 45
   * @param string $str2 <p>The second string.</p>
5076 45
   *
5077 37
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5078 37
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5079
   *             <strong>0</strong> if they are equal
5080 45
   */
5081 2
  public static function strnatcmp($str1, $str2)
5082
  {
5083
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5084 43
  }
5085 20
5086 20
  /**
5087 41
   * Case-insensitive string comparison of the first n characters.
5088
   *
5089
   * @link  http://php.net/manual/en/function.strncasecmp.php
5090 43
   *
5091
   * @param string $str1 <p>The first string.</p>
5092
   * @param string $str2 <p>The second string.</p>
5093
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5094
   *
5095
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5096 43
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5097 2
   *             <strong>0</strong> if they are equal
5098 43
   */
5099 43
  public static function strncasecmp($str1, $str2, $len)
5100 43
  {
5101 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5102
  }
5103
5104 43
  /**
5105 43
   * String comparison of the first n characters.
5106
   *
5107
   * @link  http://php.net/manual/en/function.strncmp.php
5108
   *
5109
   * @param string $str1 <p>The first string.</p>
5110
   * @param string $str2 <p>The second string.</p>
5111
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5112
   *
5113
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5114
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5115
   *             <strong>0</strong> if they are equal
5116
   */
5117
  public static function strncmp($str1, $str2, $len)
5118
  {
5119
    $str1 = self::substr($str1, 0, $len);
5120
    $str2 = self::substr($str2, 0, $len);
5121
5122
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5119 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5120 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5123
  }
5124
5125
  /**
5126
   * Search a string for any of a set of characters.
5127
   *
5128
   * @link  http://php.net/manual/en/function.strpbrk.php
5129
   *
5130
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5131
   * @param string $char_list <p>This parameter is case sensitive.</p>
5132
   *
5133
   * @return string String starting from the character found, or false if it is not found.
5134
   */
5135 1
  public static function strpbrk($haystack, $char_list)
5136
  {
5137 1
    $haystack = (string)$haystack;
5138 1
    $char_list = (string)$char_list;
5139
5140 1
    if (!isset($haystack[0], $char_list[0])) {
5141
      return false;
5142
    }
5143
5144
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5145
      return substr($haystack, strpos($haystack, $m[0]));
5146
    } else {
5147
      return false;
5148
    }
5149
  }
5150
5151
  /**
5152
   * Find position of first occurrence of string in a string.
5153
   *
5154
   * @link http://php.net/manual/en/function.mb-strpos.php
5155
   *
5156
   * @param string  $haystack  <p>The string being checked.</p>
5157
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5158
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5159
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5160
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5161 1
   *
5162
   * @return int|false <p>
5163 1
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5164 1
   *                   If needle is not found it returns false.
5165
   *                   </p>
5166 1
   */
5167 1
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5168
  {
5169
    $haystack = (string)$haystack;
5170 1
    $needle = (string)$needle;
5171 1
5172 1
    if (!isset($haystack[0], $needle[0])) {
5173
      return false;
5174 1
    }
5175 1
5176
    // init
5177
    $offset = (int)$offset;
5178 1
5179 1
    // iconv and mbstring do not support integer $needle
5180
5181 1
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5182 1
      $needle = (string)self::chr($needle);
5183 1
    }
5184
5185 1
    if ($cleanUtf8 === true) {
5186
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5187
      // if invalid characters are found in $haystack before $needle
5188
      $needle = self::clean($needle);
5189
      $haystack = self::clean($haystack);
5190
    }
5191
5192 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5193
        $encoding === 'UTF-8'
5194
        ||
5195
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5196
    ) {
5197
      $encoding = 'UTF-8';
5198
    } else {
5199
      $encoding = self::normalize_encoding($encoding);
5200
    }
5201
5202
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5203
      self::checkForSupport();
5204
    }
5205
5206 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5207 6
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...pport['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...port['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5208
        &
5209 6
        self::$support['iconv'] === true
5210 1
        &&
5211
        self::$support['mbstring'] === false
5212
    ) {
5213 1
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5214 1
    }
5215 1
5216 1
    if (
5217
        $offset >= 0 // iconv_strpos() can't handle negative offset
5218
        &&
5219
        $encoding !== 'UTF-8'
5220 1
        &&
5221 1
        self::$support['mbstring'] === false
5222 1
        &&
5223 1
        self::$support['iconv'] === true
5224 1
    ) {
5225 1
      // ignore invalid negative offset to keep compatibility
5226 1
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5227 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5228
    }
5229
5230
    if (self::$support['mbstring'] === true) {
5231 1
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5232 1
    }
5233 1
5234 1 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5235 1
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
5236 1
      if ($returnTmp !== false) {
5237 1
        return $returnTmp;
5238 1
      }
5239
    }
5240
5241 1
    if (
5242 1
        $offset >= 0 // iconv_strpos() can't handle negative offset
5243 1
        &&
5244 1
        self::$support['iconv'] === true
5245
    ) {
5246
      // ignore invalid negative offset to keep compatibility
5247
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5248 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5249
    }
5250 6
5251 1
    // fallback via vanilla php
5252 1
5253 1
    $haystack = self::substr($haystack, $offset);
5254 1
5255
    if ($offset < 0) {
5256 1
      $offset = 0;
5257
    }
5258
5259 6
    $pos = strpos($haystack, $needle);
5260 6
    if ($pos === false) {
5261
      return false;
5262 6
    }
5263 4
5264 4
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5265
    if ($returnTmp !== false) {
5266 6
      return $returnTmp;
5267
    }
5268 6
5269
    // fallback to "mb_"-function via polyfill
5270
    return \mb_strpos($haystack, $needle, $offset);
5271
  }
5272
5273
  /**
5274
   * Finds the last occurrence of a character in a string within another.
5275
   *
5276
   * @link http://php.net/manual/en/function.mb-strrchr.php
5277
   *
5278
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5279
   * @param string $needle        <p>The string to find in haystack</p>
5280 1
   * @param bool   $before_needle [optional] <p>
5281
   *                              Determines which portion of haystack
5282 1
   *                              this function returns.
5283
   *                              If set to true, it returns all of haystack
5284 1
   *                              from the beginning to the last occurrence of needle.
5285 1
   *                              If set to false, it returns all of haystack
5286
   *                              from the last occurrence of needle to the end,
5287
   *                              </p>
5288 1
   * @param string $encoding      [optional] <p>
5289 1
   *                              Character encoding name to use.
5290 1
   *                              If it is omitted, internal character encoding is used.
5291
   *                              </p>
5292 1
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5293
   *
5294
   * @return string|false The portion of haystack or false if needle is not found.
5295 1
   */
5296 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5297
  {
5298 1
    if ($encoding !== 'UTF-8') {
5299 1
      $encoding = self::normalize_encoding($encoding);
5300
    }
5301 1
5302
    if ($cleanUtf8 === true) {
5303 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5304 1
      // if invalid characters are found in $haystack before $needle
5305
      $needle = self::clean($needle);
5306 1
      $haystack = self::clean($haystack);
5307
    }
5308 1
5309
    // fallback to "mb_"-function via polyfill
5310 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5311
  }
5312 1
5313
  /**
5314
   * Reverses characters order in the string.
5315
   *
5316
   * @param string $str The input string
5317
   *
5318
   * @return string The string with characters in the reverse sequence
5319
   */
5320
  public static function strrev($str)
5321
  {
5322
    $str = (string)$str;
5323
5324
    if (!isset($str[0])) {
5325
      return '';
5326 7
    }
5327
5328 7
    return implode('', array_reverse(self::split($str)));
5329
  }
5330
5331
  /**
5332
   * Finds the last occurrence of a character in a string within another, case insensitive.
5333
   *
5334
   * @link http://php.net/manual/en/function.mb-strrichr.php
5335
   *
5336
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5337
   * @param string  $needle        <p>The string to find in haystack.</p>
5338
   * @param bool    $before_needle [optional] <p>
5339
   *                               Determines which portion of haystack
5340 1
   *                               this function returns.
5341
   *                               If set to true, it returns all of haystack
5342 1
   *                               from the beginning to the last occurrence of needle.
5343
   *                               If set to false, it returns all of haystack
5344
   *                               from the last occurrence of needle to the end,
5345
   *                               </p>
5346
   * @param string  $encoding      [optional] <p>
5347
   *                               Character encoding name to use.
5348
   *                               If it is omitted, internal character encoding is used.
5349
   *                               </p>
5350
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5351
   *
5352
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5353
   */
5354 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5355
  {
5356 1
    if ($encoding !== 'UTF-8') {
5357
      $encoding = self::normalize_encoding($encoding);
5358
    }
5359
5360
    if ($cleanUtf8 === true) {
5361
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5362
      // if invalid characters are found in $haystack before $needle
5363
      $needle = self::clean($needle);
5364
      $haystack = self::clean($haystack);
5365
    }
5366
5367
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5368 1
  }
5369
5370 1
  /**
5371
   * Find position of last occurrence of a case-insensitive string.
5372
   *
5373
   * @param string  $haystack  <p>The string to look in.</p>
5374
   * @param string  $needle    <p>The string to look for.</p>
5375
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5376
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5377
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5378
   *
5379
   * @return int|false <p>
5380
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5381
   *                   not found, it returns false.
5382
   *                   </p>
5383
   */
5384
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5385 13
  {
5386
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5387 13
      $needle = (string)self::chr($needle);
5388
    }
5389
5390 13
    // init
5391
    $haystack = (string)$haystack;
5392 13
    $needle = (string)$needle;
5393 3
    $offset = (int)$offset;
5394
5395
    if (!isset($haystack[0], $needle[0])) {
5396 11
      return false;
5397
    }
5398
5399 11 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5400 7
        $cleanUtf8 === true
5401
        ||
5402
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5403 5
    ) {
5404 1
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5405
5406
      $needle = self::clean($needle);
5407
      $haystack = self::clean($haystack);
5408 1
    }
5409 1
5410 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5411
        $encoding === 'UTF-8'
5412 1
        ||
5413 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5414
    ) {
5415
      $encoding = 'UTF-8';
5416 1
    } else {
5417
      $encoding = self::normalize_encoding($encoding);
5418
    }
5419 1
5420
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5421 5
      self::checkForSupport();
5422 5
    }
5423 5
5424 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5425 5
        $encoding !== 'UTF-8'
5426
        &&
5427 5
        self::$support['mbstring'] === false
5428 5
    ) {
5429
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5430
    }
5431 5
5432
    if (self::$support['mbstring'] === true) {
5433
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5434 5
    }
5435 5
5436 5 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5437
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
5438 5
      if ($returnTmp !== false) {
5439 2
        return $returnTmp;
5440
      }
5441 2
    }
5442 2
5443 2
    // fallback via vanilla php
5444
5445 2
    return self::strrpos(self::strtonatfold($haystack), self::strtonatfold($needle), $offset, $encoding, $cleanUtf8);
5446 1
  }
5447
5448 1
  /**
5449 1
   * Find position of last occurrence of a string in a string.
5450 1
   *
5451
   * @link http://php.net/manual/en/function.mb-strrpos.php
5452 1
   *
5453
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5454
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5455
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5456
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5457
   *                              the end of the string.
5458
   *                              </p>
5459
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5460
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5461
   *
5462
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5463
   *                   is not found, it returns false.</p>
5464
   */
5465
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5466
  {
5467 1
    if ((int)$needle === $needle && $needle >= 0) {
5468 2
      $needle = (string)self::chr($needle);
5469
    }
5470 5
5471
    // init
5472
    $haystack = (string)$haystack;
5473
    $needle = (string)$needle;
5474
    $offset = (int)$offset;
5475 5
5476
    if (!isset($haystack[0], $needle[0])) {
5477
      return false;
5478
    }
5479
5480 5 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5481 5
        $cleanUtf8 === true
5482 1
        ||
5483 1
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5484
    ) {
5485 1
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5486 1
      $needle = self::clean($needle);
5487 1
      $haystack = self::clean($haystack);
5488
    }
5489 1
5490 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5491 5
        $encoding === 'UTF-8'
5492 5
        ||
5493 5
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5494 5
    ) {
5495 1
      $encoding = 'UTF-8';
5496
    } else {
5497 5
      $encoding = self::normalize_encoding($encoding);
5498
    }
5499 5
5500
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5501
      self::checkForSupport();
5502
    }
5503
5504 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5505
        $encoding !== 'UTF-8'
5506
        &&
5507
        self::$support['mbstring'] === false
5508
    ) {
5509 2
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5510
    }
5511 2
5512 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5513 1
      $returnTmp = \mb_strrpos($haystack, $needle, $offset, $encoding);
5514
      if ($returnTmp !== false) {
5515
        return $returnTmp;
5516 1
      }
5517 1
    }
5518
5519 1 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5520
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
5521
      if ($returnTmp !== false) {
5522 2
        return $returnTmp;
5523
      }
5524 2
    }
5525 1
5526
    // fallback via vanilla php
5527
5528 2
    if ($offset > 0) {
5529
      $haystack = self::substr($haystack, $offset);
5530
    } elseif ($offset < 0) {
5531
      $haystack = self::substr($haystack, 0, $offset);
5532
      $offset = 0;
5533
    }
5534
5535
    $pos = strrpos($haystack, $needle);
5536
    if ($pos === false) {
5537
      return false;
5538
    }
5539
5540 1
    return $offset + self::strlen(substr($haystack, 0, $pos));
5541
  }
5542 1
5543
  /**
5544
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5545
   * mask.
5546
   *
5547
   * @param string $str    <p>The input string.</p>
5548
   * @param string $mask   <p>The mask of chars</p>
5549
   * @param int    $offset [optional]
5550
   * @param int    $length [optional]
5551
   *
5552
   * @return int
5553
   */
5554
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5555
  {
5556
    // init
5557
    $length = (int)$length;
5558
    $offset = (int)$offset;
5559
5560
    if ($offset || 2147483647 !== $length) {
5561
      $str = self::substr($str, $offset, $length);
5562
    }
5563
5564
    $str = (string)$str;
5565
    if (!isset($str[0], $mask[0])) {
5566
      return 0;
5567
    }
5568 20
5569
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5570 20
  }
5571 2
5572
  /**
5573
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5574 2
   *
5575 2
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5576
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5577 2
   * @param bool    $before_needle [optional] <p>
5578
   *                               If <b>TRUE</b>, strstr() returns the part of the
5579
   *                               haystack before the first occurrence of the needle (excluding the needle).
5580 20
   *                               </p>
5581
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5582 20
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5583 4
   *
5584
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5585
   */
5586 19
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5587 19
  {
5588
    $haystack = (string)$haystack;
5589
    $needle = (string)$needle;
5590 19
5591 19
    if (!isset($haystack[0], $needle[0])) {
5592
      return false;
5593 19
    }
5594 19
5595 19
    if ($cleanUtf8 === true) {
5596 19
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5597
      // if invalid characters are found in $haystack before $needle
5598 19
      $needle = self::clean($needle);
5599
      $haystack = self::clean($haystack);
5600 16
    }
5601 16
5602 16
    if ($encoding !== 'UTF-8') {
5603 16
      $encoding = self::normalize_encoding($encoding);
5604 5
    }
5605 5
5606 5
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5607
      self::checkForSupport();
5608
    }
5609 19
5610 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5611 17
        $encoding !== 'UTF-8'
5612 13
        &&
5613 13
        self::$support['mbstring'] === false
5614 13
    ) {
5615 8
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5616 8
    }
5617 8
5618
    if (self::$support['mbstring'] === true) {
5619
      $returnTmp = \mb_strstr($haystack, $needle, $before_needle, $encoding);
5620 19
      if ($returnTmp !== false) {
5621
        return $returnTmp;
5622 9
      }
5623 4
    }
5624 4
5625 4
    if (self::$support['intl'] === true) {
5626 6
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
5627 6
      if ($returnTmp !== false) {
5628 6
        return $returnTmp;
5629
      }
5630
    }
5631 9
5632 6
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5633 6
5634 6
    if (!isset($match[1])) {
5635
      return false;
5636
    }
5637 19
5638
    if ($before_needle) {
5639 4
      return $match[1];
5640 4
    }
5641 2
5642 2
    return self::substr($haystack, self::strlen($match[1]));
5643 3
  }
5644 3
5645 3
  /**
5646
   * Unicode transformation for case-less matching.
5647
   *
5648 4
   * @link http://unicode.org/reports/tr21/tr21-5.html
5649 16
   *
5650
   * @param string  $str       <p>The input string.</p>
5651 19
   * @param bool    $full      [optional] <p>
5652
   *                           <b>true</b>, replace full case folding chars (default)<br />
5653
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5654 19
   *                           </p>
5655 19
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5656
   *
5657 3
   * @return string
5658 19
   */
5659
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5660 19
  {
5661
    // init
5662
    $str = (string)$str;
5663 19
5664 19
    if (!isset($str[0])) {
5665 19
      return '';
5666 2
    }
5667 19
5668
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5669 19
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5670
5671 19
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5672
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5673
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5674
    }
5675
5676
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5677
5678
    if ($full) {
5679
5680
      static $fullCaseFold = null;
5681
5682
      if ($fullCaseFold === null) {
5683
        $fullCaseFold = self::getData('caseFolding_full');
5684
      }
5685
5686
      /** @noinspection OffsetOperationsInspection */
5687 26
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5688
    }
5689 26
5690
    if ($cleanUtf8 === true) {
5691 26
      $str = self::clean($str);
5692 5
    }
5693
5694
    return self::strtolower($str);
5695
  }
5696 22
5697 6
  /**
5698
   * Make a string lowercase.
5699
   *
5700 16
   * @link http://php.net/manual/en/function.mb-strtolower.php
5701
   *
5702
   * @param string  $str       <p>The string being lowercased.</p>
5703
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5704
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5705
   *
5706
   * @return string str with all alphabetic characters converted to lowercase.
5707
   */
5708 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5709
  {
5710
    // init
5711
    $str = (string)$str;
5712 14
5713
    if (!isset($str[0])) {
5714 14
      return '';
5715
    }
5716
5717
    if ($cleanUtf8 === true) {
5718
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5719
      // if invalid characters are found in $haystack before $needle
5720
      $str = self::clean($str);
5721
    }
5722
5723
    if ($encoding !== 'UTF-8') {
5724
      $encoding = self::normalize_encoding($encoding);
5725
    }
5726
5727
    return \mb_strtolower($str, $encoding);
5728 1
  }
5729
5730 1
  /**
5731
   * Generic case sensitive transformation for collation matching.
5732
   *
5733
   * @param string $str <p>The input string</p>
5734
   *
5735
   * @return string
5736
   */
5737
  private static function strtonatfold($str)
5738
  {
5739
    /** @noinspection PhpUndefinedClassInspection */
5740
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5741
  }
5742
5743
  /**
5744 8
   * Make a string uppercase.
5745
   *
5746 8
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5747 2
   *
5748
   * @param string  $str       <p>The string being uppercased.</p>
5749
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5750 7
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5751 7
   *
5752 7
   * @return string str with all alphabetic characters converted to uppercase.
5753
   */
5754 7 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5755 1
  {
5756 1
    $str = (string)$str;
5757 7
5758
    if (!isset($str[0])) {
5759
      return '';
5760 7
    }
5761
5762 7
    if ($cleanUtf8 === true) {
5763 7
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5764
      // if invalid characters are found in $haystack before $needle
5765
      $str = self::clean($str);
5766
    }
5767 7
5768
    if ($encoding !== 'UTF-8') {
5769
      $encoding = self::normalize_encoding($encoding);
5770
    }
5771 1
5772 1
    return \mb_strtoupper($str, $encoding);
5773 1
  }
5774 7
5775 7
  /**
5776 7
   * Translate characters or replace sub-strings.
5777
   *
5778 7
   * @link  http://php.net/manual/en/function.strtr.php
5779 7
   *
5780
   * @param string          $str  <p>The string being translated.</p>
5781 7
   * @param string|string[] $from <p>The string replacing from.</p>
5782
   * @param string|string[] $to   <p>The string being translated to to.</p>
5783
   *
5784
   * @return string <p>
5785
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5786
   *                corresponding character in to.
5787
   *                </p>
5788
   */
5789
  public static function strtr($str, $from, $to = INF)
5790
  {
5791
    if (INF !== $to) {
5792
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5792 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5793
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5793 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5794
      $countFrom = count($from);
5795
      $countTo = count($to);
5796
5797
      if ($countFrom > $countTo) {
5798
        $from = array_slice($from, 0, $countTo);
5799
      } elseif ($countFrom < $countTo) {
5800
        $to = array_slice($to, 0, $countFrom);
5801 1
      }
5802
5803 1
      $from = array_combine($from, $to);
5804
    }
5805 1
5806 1
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5789 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5807
  }
5808
5809 1
  /**
5810
   * Return the width of a string.
5811 1
   *
5812
   * @param string  $str       <p>The input string.</p>
5813 1
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5814 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5815 1
   *
5816 1
   * @return int
5817
   */
5818 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5819 1
  {
5820 1
    if ($encoding !== 'UTF-8') {
5821
      $encoding = self::normalize_encoding($encoding);
5822 1
    }
5823
5824
    if ($cleanUtf8 === true) {
5825
      // iconv and mbstring are not tolerant to invalid encoding
5826
      // further, their behaviour is inconsistent with that of PHP's substr
5827
      $str = self::clean($str);
5828
    }
5829
5830 1
    // fallback to "mb_"-function via polyfill
5831
    return \mb_strwidth($str, $encoding);
5832
  }
5833
5834
  /**
5835
   * Get part of a string.
5836
   *
5837
   * @link http://php.net/manual/en/function.mb-substr.php
5838
   *
5839
   * @param string  $str       <p>The string being checked.</p>
5840
   * @param int     $start     <p>The first position used in str.</p>
5841
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5842
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5843
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5844
   *
5845
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5846
   */
5847
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5848
  {
5849
    // init
5850
    $str = (string)$str;
5851
5852
    if (!isset($str[0])) {
5853
      return '';
5854
    }
5855
5856
    if ($cleanUtf8 === true) {
5857
      // iconv and mbstring are not tolerant to invalid encoding
5858
      // further, their behaviour is inconsistent with that of PHP's substr
5859
      $str = self::clean($str);
5860
    }
5861
5862
    $str_length = 0;
5863
    if ($start || $length === null) {
5864
      $str_length = (int)self::strlen($str);
5865
    }
5866
5867
    if ($start && $start > $str_length) {
5868
      return false;
5869
    }
5870
5871
    if ($length === null) {
5872
      $length = $str_length;
5873
    } else {
5874
      $length = (int)$length;
5875
    }
5876
5877 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5878
        $encoding === 'UTF-8'
5879
        ||
5880
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5881
    ) {
5882
      $encoding = 'UTF-8';
5883
    } else {
5884
      $encoding = self::normalize_encoding($encoding);
5885
    }
5886
5887
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5888
      self::checkForSupport();
5889
    }
5890
5891 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5892
        $encoding !== 'UTF-8'
5893
        &&
5894
        self::$support['mbstring'] === false
5895
    ) {
5896
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5897
    }
5898
5899
    if (self::$support['mbstring'] === true) {
5900
      return \mb_substr($str, $start, $length, $encoding);
5901
    }
5902
5903
    if (
5904
        $length >= 0 // "iconv_substr()" can't handle negative length
5905
        &&
5906
        self::$support['iconv'] === true
5907
    ) {
5908
      return \iconv_substr($str, $start, $length);
5909
    }
5910
5911
    if (self::$support['intl'] === true) {
5912
      return \grapheme_substr($str, $start, $length);
5913
    }
5914
5915
    // fallback via vanilla php
5916
5917
    // split to array, and remove invalid characters
5918
    $array = self::split($str);
5919
5920
    // extract relevant part, and join to make sting again
5921
    return implode('', array_slice($array, $start, $length));
5922
  }
5923
5924
  /**
5925
   * Binary safe comparison of two strings from an offset, up to length characters.
5926
   *
5927
   * @param string  $main_str           <p>The main string being compared.</p>
5928
   * @param string  $str                <p>The secondary string being compared.</p>
5929
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5930
   *                                    the end of the string.</p>
5931
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5932
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5933
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5934
   *                                    insensitive.</p>
5935
   *
5936
   * @return int
5937
   */
5938
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5939
  {
5940
    $main_str = self::substr($main_str, $offset, $length);
5941
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5940 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5942
5943
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5940 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5941 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5940 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5941 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5944
  }
5945
5946
  /**
5947
   * Count the number of substring occurrences.
5948
   *
5949
   * @link  http://php.net/manual/en/function.substr-count.php
5950
   *
5951
   * @param string  $haystack  <p>The string to search in.</p>
5952
   * @param string  $needle    <p>The substring to search for.</p>
5953
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5954
   * @param int     $length    [optional] <p>
5955
   *                           The maximum length after the specified offset to search for the
5956
   *                           substring. It outputs a warning if the offset plus the length is
5957
   *                           greater than the haystack length.
5958
   *                           </p>
5959
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5960
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5961
   *
5962
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5963
   */
5964
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5965
  {
5966
    // init
5967
    $haystack = (string)$haystack;
5968
    $needle = (string)$needle;
5969
5970
    if (!isset($haystack[0], $needle[0])) {
5971
      return false;
5972
    }
5973
5974
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5975
      $offset = (int)$offset;
5976
      $length = (int)$length;
5977
5978
      if (
5979
          $length + $offset <= 0
5980
          &&
5981
          Bootup::is_php('7.1') === false
5982
      ) {
5983
        return false;
5984
      }
5985
5986
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5987
    }
5988
5989
    if ($encoding !== 'UTF-8') {
5990
      $encoding = self::normalize_encoding($encoding);
5991
    }
5992
5993
    if ($cleanUtf8 === true) {
5994
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5995
      // if invalid characters are found in $haystack before $needle
5996
      $needle = self::clean($needle);
5997
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5998
    }
5999
6000
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
6001
      self::checkForSupport();
6002
    }
6003
6004 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6005
        $encoding !== 'UTF-8'
6006
        &&
6007
        self::$support['mbstring'] === false
6008
    ) {
6009
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6010
    }
6011
6012
    if (self::$support['mbstring'] === true) {
6013
      return \mb_substr_count($haystack, $needle, $encoding);
6014
    }
6015
6016
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6017
6018
    return count($matches);
6019
  }
6020
6021
  /**
6022
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6023
   *
6024
   * @param string $haystack <p>The string to search in.</p>
6025
   * @param string $needle   <p>The substring to search for.</p>
6026
   *
6027
   * @return string <p>Return the sub-string.</p>
6028
   */
6029 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6030
  {
6031
    // init
6032
    $haystack = (string)$haystack;
6033
    $needle = (string)$needle;
6034
6035
    if (!isset($haystack[0])) {
6036
      return '';
6037
    }
6038
6039
    if (!isset($needle[0])) {
6040
      return $haystack;
6041
    }
6042
6043
    if (self::str_istarts_with($haystack, $needle) === true) {
6044
      $haystack = self::substr($haystack, self::strlen($needle));
6045
    }
6046
6047
    return $haystack;
6048
  }
6049
6050
  /**
6051
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6052
   *
6053
   * @param string $haystack <p>The string to search in.</p>
6054
   * @param string $needle   <p>The substring to search for.</p>
6055
   *
6056
   * @return string <p>Return the sub-string.</p>
6057 1
   */
6058 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6059 1
  {
6060
    // init
6061
    $haystack = (string)$haystack;
6062
    $needle = (string)$needle;
6063
6064
    if (!isset($haystack[0])) {
6065
      return '';
6066
    }
6067
6068
    if (!isset($needle[0])) {
6069 6
      return $haystack;
6070
    }
6071 6
6072 6
    if (self::str_iends_with($haystack, $needle) === true) {
6073
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6074 6
    }
6075
6076 6
    return $haystack;
6077 3
  }
6078
6079
  /**
6080
   * Removes an prefix ($needle) from start of the string ($haystack).
6081 6
   *
6082
   * @param string $haystack <p>The string to search in.</p>
6083 6
   * @param string $needle   <p>The substring to search for.</p>
6084 1
   *
6085 1
   * @return string <p>Return the sub-string.</p>
6086 1
   */
6087 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6088 6
  {
6089
    // init
6090
    $haystack = (string)$haystack;
6091
    $needle = (string)$needle;
6092
6093
    if (!isset($haystack[0])) {
6094
      return '';
6095
    }
6096
6097
    if (!isset($needle[0])) {
6098 6
      return $haystack;
6099
    }
6100 6
6101
    if (self::str_starts_with($haystack, $needle) === true) {
6102 6
      $haystack = self::substr($haystack, self::strlen($needle));
6103 6
    }
6104
6105
    return $haystack;
6106 5
  }
6107 5
6108
  /**
6109 5
   * Replace text within a portion of a string.
6110 1
   *
6111 1
   * source: https://gist.github.com/stemar/8287074
6112 1
   *
6113
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6114 5
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6115
   * @param int|int[]       $start            <p>
6116
   *                                          If start is positive, the replacing will begin at the start'th offset
6117
   *                                          into string.
6118
   *                                          <br /><br />
6119
   *                                          If start is negative, the replacing will begin at the start'th character
6120
   *                                          from the end of string.
6121
   *                                          </p>
6122
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6123
   *                                          portion of string which is to be replaced. If it is negative, it
6124
   *                                          represents the number of characters from the end of string at which to
6125
   *                                          stop replacing. If it is not given, then it will default to strlen(
6126
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6127
   *                                          length is zero then this function will have the effect of inserting
6128
   *                                          replacement into string at the given start offset.</p>
6129
   *
6130
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6131
   */
6132
  public static function substr_replace($str, $replacement, $start, $length = null)
6133
  {
6134
    if (is_array($str)) {
6135
      $num = count($str);
6136
6137
      // $replacement
6138
      if (is_array($replacement)) {
6139
        $replacement = array_slice($replacement, 0, $num);
6140
      } else {
6141
        $replacement = array_pad(array($replacement), $num, $replacement);
6142
      }
6143
6144 1
      // $start
6145 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6146 1
        $start = array_slice($start, 0, $num);
6147
        foreach ($start as &$valueTmp) {
6148
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6149
        }
6150
        unset($valueTmp);
6151
      } else {
6152
        $start = array_pad(array($start), $num, $start);
6153
      }
6154
6155
      // $length
6156
      if (!isset($length)) {
6157
        $length = array_fill(0, $num, 0);
6158 1 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6159
        $length = array_slice($length, 0, $num);
6160 1
        foreach ($length as &$valueTmpV2) {
6161
          if (isset($valueTmpV2)) {
6162 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6163 1
          } else {
6164
            $valueTmpV2 = 0;
6165
          }
6166 1
        }
6167
        unset($valueTmpV2);
6168 1
      } else {
6169 1
        $length = array_pad(array($length), $num, $length);
6170
      }
6171
6172 1
      // Recursive call
6173
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6174
6175 1
    } else {
6176 1
6177 1
      if (is_array($replacement)) {
6178 1
        if (count($replacement) > 0) {
6179 1
          $replacement = $replacement[0];
6180
        } else {
6181
          $replacement = '';
6182 1
        }
6183
      }
6184
    }
6185
6186
    // init
6187
    $str = (string)$str;
6188
    $replacement = (string)$replacement;
6189
6190
    if (!isset($str[0])) {
6191
      return $replacement;
6192
    }
6193
6194
    preg_match_all('/./us', $str, $smatches);
6195
    preg_match_all('/./us', $replacement, $rmatches);
6196
6197
    if ($length === null) {
6198
      $length = (int)self::strlen($str);
6199
    }
6200
6201 10
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6202
6203 10
    return implode('', $smatches[0]);
6204 10
  }
6205
6206 10
  /**
6207 3
   * Removes an suffix ($needle) from end of the string ($haystack).
6208
   *
6209
   * @param string $haystack <p>The string to search in.</p>
6210 8
   * @param string $needle   <p>The substring to search for.</p>
6211 8
   *
6212 8
   * @return string <p>Return the sub-string.</p>
6213
   */
6214 8 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6215
  {
6216 8
    $haystack = (string)$haystack;
6217
    $needle = (string)$needle;
6218 8
6219 1
    if (!isset($haystack[0])) {
6220 1
      return '';
6221 1
    }
6222
6223 8
    if (!isset($needle[0])) {
6224 8
      return $haystack;
6225
    }
6226 8
6227 8
    if (self::str_ends_with($haystack, $needle) === true) {
6228 8
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6229 8
    }
6230 8
6231
    return $haystack;
6232 8
  }
6233 8
6234 8
  /**
6235 8
   * Returns a case swapped version of the string.
6236
   *
6237 8
   * @param string  $str       <p>The input string.</p>
6238 6
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6239 6
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6240 6
   *
6241 6
   * @return string <p>Each character's case swapped.</p>
6242
   */
6243 6
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6244 3
  {
6245 3
    $str = (string)$str;
6246
6247 6
    if (!isset($str[0])) {
6248 6
      return '';
6249
    }
6250 8
6251
    if ($encoding !== 'UTF-8') {
6252
      $encoding = self::normalize_encoding($encoding);
6253
    }
6254
6255
    if ($cleanUtf8 === true) {
6256
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6257
      // if invalid characters are found in $haystack before $needle
6258 1
      $str = self::clean($str);
6259
    }
6260 1
6261
    $strSwappedCase = preg_replace_callback(
6262
        '/[\S]/u',
6263
        function ($match) use ($encoding) {
6264
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6265
6266
          if ($match[0] === $marchToUpper) {
6267
            return UTF8::strtolower($match[0], $encoding);
6268
          } else {
6269
            return $marchToUpper;
6270
          }
6271
        },
6272
        $str
6273
    );
6274
6275
    return $strSwappedCase;
6276
  }
6277
6278
  /**
6279
   * alias for "UTF8::to_ascii()"
6280
   *
6281
   * @see UTF8::to_ascii()
6282
   *
6283
   * @param string $s
6284
   * @param string $subst_chr
6285
   * @param bool   $strict
6286
   *
6287
   * @return string
6288
   *
6289
   * @deprecated
6290
   */
6291
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6292
  {
6293
    return self::to_ascii($s, $subst_chr, $strict);
6294
  }
6295
6296
  /**
6297
   * alias for "UTF8::to_iso8859()"
6298
   *
6299
   * @see UTF8::to_iso8859()
6300
   *
6301
   * @param string $str
6302
   *
6303
   * @return string|string[]
6304
   *
6305
   * @deprecated
6306
   */
6307
  public static function toIso8859($str)
6308
  {
6309
    return self::to_iso8859($str);
6310
  }
6311
6312
  /**
6313
   * alias for "UTF8::to_latin1()"
6314
   *
6315
   * @see UTF8::to_latin1()
6316
   *
6317
   * @param $str
6318
   *
6319
   * @return string
6320
   *
6321
   * @deprecated
6322
   */
6323
  public static function toLatin1($str)
6324
  {
6325
    return self::to_latin1($str);
6326
  }
6327
6328
  /**
6329
   * alias for "UTF8::to_utf8()"
6330
   *
6331
   * @see UTF8::to_utf8()
6332
   *
6333
   * @param string $str
6334
   *
6335
   * @return string
6336
   *
6337
   * @deprecated
6338
   */
6339
  public static function toUTF8($str)
6340
  {
6341
    return self::to_utf8($str);
6342
  }
6343
6344
  /**
6345
   * Convert a string into ASCII.
6346
   *
6347
   * @param string $str     <p>The input string.</p>
6348
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6349
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6350
   *                        performance</p>
6351
   *
6352
   * @return string
6353
   *
6354
   * @throws \Exception
6355
   */
6356
  public static function to_ascii($str, $unknown = '?', $strict = false)
6357
  {
6358
    static $UTF8_TO_ASCII;
6359
6360
    // init
6361
    $str = (string)$str;
6362
6363
    if (!isset($str[0])) {
6364
      return '';
6365
    }
6366
6367
    $str = self::clean($str, true, true, true);
6368
6369
    // check if we only have ASCII
6370
    if (self::is_ascii($str) === true) {
6371
      return $str;
6372
    }
6373
6374
    if ($strict === true) {
6375
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
6376
        self::checkForSupport();
6377
      }
6378
6379
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6380
6381
        // HACK for issue from "transliterator_transliterate()"
6382
        $str = str_replace(
6383
            'ℌ',
6384
            'H',
6385
            $str
6386
        );
6387
6388
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6389
6390
        // check again, if we only have ASCII, now ...
6391
        if (self::is_ascii($str) === true) {
6392
          return $str;
6393
        }
6394
6395
      } else {
6396
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
6397
      }
6398
    }
6399
6400
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6401
    $chars = $ar[0];
6402
    foreach ($chars as &$c) {
6403
6404
      $ordC0 = ord($c[0]);
6405
6406
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6407
        continue;
6408
      }
6409
6410
      $ordC1 = ord($c[1]);
6411
6412
      // ASCII - next please
6413
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6414
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6415
      }
6416
6417
      if ($ordC0 >= 224) {
6418
        $ordC2 = ord($c[2]);
6419
6420
        if ($ordC0 <= 239) {
6421
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6422
        }
6423
6424
        if ($ordC0 >= 240) {
6425
          $ordC3 = ord($c[3]);
6426
6427
          if ($ordC0 <= 247) {
6428
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6429
          }
6430
6431
          if ($ordC0 >= 248) {
6432
            $ordC4 = ord($c[4]);
6433
6434 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6435
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6436
            }
6437
6438
            if ($ordC0 >= 252) {
6439
              $ordC5 = ord($c[5]);
6440
6441 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6442
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6443
              }
6444
            }
6445
          }
6446
        }
6447
      }
6448
6449
      if ($ordC0 == 254 || $ordC0 == 255) {
6450
        $c = $unknown;
6451
        continue;
6452
      }
6453
6454
      if (!isset($ord)) {
6455
        $c = $unknown;
6456
        continue;
6457
      }
6458
6459
      $bank = $ord >> 8;
6460
      if (!isset($UTF8_TO_ASCII[$bank])) {
6461
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6462
        if ($UTF8_TO_ASCII[$bank] === false) {
6463
          $UTF8_TO_ASCII[$bank] = array();
6464
        }
6465
      }
6466
6467
      $newchar = $ord & 255;
6468
6469
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6470
6471
        // keep for debugging
6472
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
45% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6473
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6474
        echo "char: " . $c . "\n";
6475
        echo "ord: " . $ord . "\n";
6476
        echo "newchar: " . $newchar . "\n";
6477
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6478
        echo "bank:" . $bank . "\n\n";
6479
        */
6480
6481
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6482
      } else {
6483
6484
        // keep for debugging missing chars
6485
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
41% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6486
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6487
        echo "char: " . $c . "\n";
6488
        echo "ord: " . $ord . "\n";
6489
        echo "newchar: " . $newchar . "\n";
6490
        echo "bank:" . $bank . "\n\n";
6491
        */
6492
6493
        $c = $unknown;
6494
      }
6495
    }
6496
6497
    return implode('', $chars);
6498
  }
6499
6500
  /**
6501
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6502
   *
6503
   * @param string|string[] $str
6504
   *
6505
   * @return string|string[]
6506
   */
6507
  public static function to_iso8859($str)
6508
  {
6509
    if (is_array($str)) {
6510
6511
      /** @noinspection ForeachSourceInspection */
6512
      foreach ($str as $k => $v) {
6513
        /** @noinspection AlterInForeachInspection */
6514
        /** @noinspection OffsetOperationsInspection */
6515
        $str[$k] = self::to_iso8859($v);
6516
      }
6517
6518
      return $str;
6519
    }
6520
6521
    $str = (string)$str;
6522
6523
    if (!isset($str[0])) {
6524
      return '';
6525
    }
6526
6527
    return self::utf8_decode($str);
6528
  }
6529
6530
  /**
6531
   * alias for "UTF8::to_iso8859()"
6532
   *
6533
   * @see UTF8::to_iso8859()
6534
   *
6535
   * @param string|string[] $str
6536
   *
6537
   * @return string|string[]
6538
   */
6539
  public static function to_latin1($str)
6540
  {
6541
    return self::to_iso8859($str);
6542
  }
6543
6544
  /**
6545
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6546
   *
6547
   * <ul>
6548
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6549
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
6550
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6551
   * case.</li>
6552
   * </ul>
6553
   *
6554
   * @param string|string[] $str                    <p>Any string or array.</p>
6555
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6556
   *
6557
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6558
   */
6559
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6560
  {
6561
    if (is_array($str)) {
6562
      /** @noinspection ForeachSourceInspection */
6563
      foreach ($str as $k => $v) {
6564
        /** @noinspection AlterInForeachInspection */
6565
        /** @noinspection OffsetOperationsInspection */
6566
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6567
      }
6568
6569
      return $str;
6570
    }
6571
6572
    $str = (string)$str;
6573
6574
    if (!isset($str[0])) {
6575
      return $str;
6576
    }
6577
6578
    $max = strlen($str);
6579
    $buf = '';
6580
6581
    /** @noinspection ForeachInvariantsInspection */
6582
    for ($i = 0; $i < $max; $i++) {
6583
      $c1 = $str[$i];
6584
6585
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6586
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6587
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6588
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6589
6590
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6591
6592
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6593
            $buf .= $c1 . $c2;
6594
            $i++;
6595
          } else { // not valid UTF8 - convert it
6596
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6597
            $cc2 = ($c1 & "\x3f") | "\x80";
6598
            $buf .= $cc1 . $cc2;
6599
          }
6600
6601 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6602
6603
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6604
            $buf .= $c1 . $c2 . $c3;
6605
            $i += 2;
6606
          } else { // not valid UTF8 - convert it
6607
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6608
            $cc2 = ($c1 & "\x3f") | "\x80";
6609
            $buf .= $cc1 . $cc2;
6610
          }
6611
6612
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6613
6614 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6615
            $buf .= $c1 . $c2 . $c3 . $c4;
6616
            $i += 3;
6617
          } else { // not valid UTF8 - convert it
6618
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6619
            $cc2 = ($c1 & "\x3f") | "\x80";
6620
            $buf .= $cc1 . $cc2;
6621
          }
6622
6623
        } else { // doesn't look like UTF8, but should be converted
6624
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6625
          $cc2 = (($c1 & "\x3f") | "\x80");
6626
          $buf .= $cc1 . $cc2;
6627
        }
6628
6629
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6630
6631
        $ordC1 = ord($c1);
6632
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6633
          $buf .= self::$win1252ToUtf8[$ordC1];
6634
        } else {
6635
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6636
          $cc2 = (($c1 & "\x3f") | "\x80");
6637
          $buf .= $cc1 . $cc2;
6638
        }
6639
6640
      } else { // it doesn't need conversion
6641
        $buf .= $c1;
6642
      }
6643
    }
6644
6645
    // decode unicode escape sequences
6646
    $buf = preg_replace_callback(
6647
        '/\\\\u([0-9a-f]{4})/i',
6648
        function ($match) {
6649
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6650
        },
6651
        $buf
6652
    );
6653
6654
    // decode UTF-8 codepoints
6655
    if ($decodeHtmlEntityToUtf8 === true) {
6656
      $buf = self::html_entity_decode($buf);
6657
    }
6658
6659
    return $buf;
6660
  }
6661
6662
  /**
6663
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6664
   *
6665
   * INFO: This is slower then "trim()"
6666
   *
6667
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6668
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6669
   *
6670
   * @param string $str   <p>The string to be trimmed</p>
6671
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6672
   *
6673
   * @return string <p>The trimmed string.</p>
6674
   */
6675
  public static function trim($str = '', $chars = INF)
6676
  {
6677
    $str = (string)$str;
6678
6679
    if (!isset($str[0])) {
6680
      return '';
6681
    }
6682
6683
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6684
    if ($chars === INF || !$chars) {
6685
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6686
    }
6687
6688
    return self::rtrim(self::ltrim($str, $chars), $chars);
6689
  }
6690
6691
  /**
6692
   * Makes string's first char uppercase.
6693
   *
6694
   * @param string  $str       <p>The input string.</p>
6695
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6696
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6697
   *
6698
   * @return string <p>The resulting string</p>
6699
   */
6700
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6701
  {
6702
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6703
  }
6704
6705
  /**
6706
   * alias for "UTF8::ucfirst()"
6707
   *
6708
   * @see UTF8::ucfirst()
6709
   *
6710
   * @param string  $word
6711
   * @param string  $encoding
6712
   * @param boolean $cleanUtf8
6713
   *
6714
   * @return string
6715
   */
6716
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6717
  {
6718
    return self::ucfirst($word, $encoding, $cleanUtf8);
6719
  }
6720
6721
  /**
6722
   * Uppercase for all words in the string.
6723
   *
6724
   * @param string   $str        <p>The input string.</p>
6725
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6726
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6727
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6728
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6729
   *
6730
   * @return string
6731
   */
6732
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6733
  {
6734
    if (!$str) {
6735
      return '';
6736
    }
6737
6738
    $words = self::str_to_words($str, $charlist);
6739
    $newwords = array();
6740
6741
    if (count($exceptions) > 0) {
6742
      $useExceptions = true;
6743
    } else {
6744
      $useExceptions = false;
6745
    }
6746
6747
    foreach ($words as $word) {
6748
6749
      if (!$word) {
6750
        continue;
6751
      }
6752
6753
      if (
6754
          ($useExceptions === false)
6755
          ||
6756
          (
6757
              $useExceptions === true
6758
              &&
6759
              !in_array($word, $exceptions, true)
6760
          )
6761
      ) {
6762
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6763
      }
6764
6765
      $newwords[] = $word;
6766
    }
6767
6768
    return implode('', $newwords);
6769
  }
6770
6771
  /**
6772
   * Multi decode html entity & fix urlencoded-win1252-chars.
6773
   *
6774
   * e.g:
6775
   * 'test+test'                     => 'test test'
6776
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6777
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6778
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6779
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6780
   * 'Düsseldorf'                   => 'Düsseldorf'
6781
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6782
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6783
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6784
   *
6785
   * @param string $str          <p>The input string.</p>
6786
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6787
   *
6788
   * @return string
6789
   */
6790 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6791
  {
6792
    $str = (string)$str;
6793
6794
    if (!isset($str[0])) {
6795
      return '';
6796
    }
6797
6798
    $pattern = '/%u([0-9a-f]{3,4})/i';
6799
    if (preg_match($pattern, $str)) {
6800
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6801
    }
6802
6803
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6804
6805
    do {
6806
      $str_compare = $str;
6807
6808
      $str = self::fix_simple_utf8(
6809
          urldecode(
6810
              self::html_entity_decode(
6811
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6812
                  $flags
6813
              )
6814
          )
6815
      );
6816
6817
    } while ($multi_decode === true && $str_compare !== $str);
6818
6819
    return (string)$str;
6820
  }
6821
6822
  /**
6823
   * Return a array with "urlencoded"-win1252 -> UTF-8
6824
   *
6825
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6826
   *
6827
   * @return array
6828
   */
6829
  public static function urldecode_fix_win1252_chars()
6830
  {
6831
    static $array = array(
6832
        '%20' => ' ',
6833
        '%21' => '!',
6834
        '%22' => '"',
6835
        '%23' => '#',
6836
        '%24' => '$',
6837
        '%25' => '%',
6838
        '%26' => '&',
6839
        '%27' => "'",
6840
        '%28' => '(',
6841
        '%29' => ')',
6842
        '%2A' => '*',
6843
        '%2B' => '+',
6844
        '%2C' => ',',
6845
        '%2D' => '-',
6846
        '%2E' => '.',
6847
        '%2F' => '/',
6848
        '%30' => '0',
6849
        '%31' => '1',
6850
        '%32' => '2',
6851
        '%33' => '3',
6852
        '%34' => '4',
6853
        '%35' => '5',
6854
        '%36' => '6',
6855
        '%37' => '7',
6856
        '%38' => '8',
6857
        '%39' => '9',
6858
        '%3A' => ':',
6859
        '%3B' => ';',
6860
        '%3C' => '<',
6861
        '%3D' => '=',
6862
        '%3E' => '>',
6863
        '%3F' => '?',
6864
        '%40' => '@',
6865
        '%41' => 'A',
6866
        '%42' => 'B',
6867
        '%43' => 'C',
6868
        '%44' => 'D',
6869
        '%45' => 'E',
6870
        '%46' => 'F',
6871
        '%47' => 'G',
6872
        '%48' => 'H',
6873
        '%49' => 'I',
6874
        '%4A' => 'J',
6875
        '%4B' => 'K',
6876
        '%4C' => 'L',
6877
        '%4D' => 'M',
6878
        '%4E' => 'N',
6879
        '%4F' => 'O',
6880
        '%50' => 'P',
6881
        '%51' => 'Q',
6882
        '%52' => 'R',
6883
        '%53' => 'S',
6884
        '%54' => 'T',
6885
        '%55' => 'U',
6886
        '%56' => 'V',
6887
        '%57' => 'W',
6888
        '%58' => 'X',
6889
        '%59' => 'Y',
6890
        '%5A' => 'Z',
6891
        '%5B' => '[',
6892
        '%5C' => '\\',
6893
        '%5D' => ']',
6894
        '%5E' => '^',
6895
        '%5F' => '_',
6896
        '%60' => '`',
6897
        '%61' => 'a',
6898
        '%62' => 'b',
6899
        '%63' => 'c',
6900
        '%64' => 'd',
6901
        '%65' => 'e',
6902
        '%66' => 'f',
6903
        '%67' => 'g',
6904
        '%68' => 'h',
6905
        '%69' => 'i',
6906
        '%6A' => 'j',
6907
        '%6B' => 'k',
6908
        '%6C' => 'l',
6909
        '%6D' => 'm',
6910
        '%6E' => 'n',
6911
        '%6F' => 'o',
6912
        '%70' => 'p',
6913
        '%71' => 'q',
6914
        '%72' => 'r',
6915
        '%73' => 's',
6916
        '%74' => 't',
6917
        '%75' => 'u',
6918
        '%76' => 'v',
6919
        '%77' => 'w',
6920
        '%78' => 'x',
6921
        '%79' => 'y',
6922
        '%7A' => 'z',
6923
        '%7B' => '{',
6924
        '%7C' => '|',
6925
        '%7D' => '}',
6926
        '%7E' => '~',
6927
        '%7F' => '',
6928
        '%80' => '`',
6929
        '%81' => '',
6930
        '%82' => '‚',
6931
        '%83' => 'ƒ',
6932
        '%84' => '„',
6933
        '%85' => '…',
6934
        '%86' => '†',
6935
        '%87' => '‡',
6936
        '%88' => 'ˆ',
6937
        '%89' => '‰',
6938
        '%8A' => 'Š',
6939
        '%8B' => '‹',
6940
        '%8C' => 'Œ',
6941
        '%8D' => '',
6942
        '%8E' => 'Ž',
6943
        '%8F' => '',
6944
        '%90' => '',
6945
        '%91' => '‘',
6946
        '%92' => '’',
6947
        '%93' => '“',
6948
        '%94' => '”',
6949
        '%95' => '•',
6950
        '%96' => '–',
6951
        '%97' => '—',
6952
        '%98' => '˜',
6953
        '%99' => '™',
6954
        '%9A' => 'š',
6955
        '%9B' => '›',
6956
        '%9C' => 'œ',
6957
        '%9D' => '',
6958
        '%9E' => 'ž',
6959
        '%9F' => 'Ÿ',
6960
        '%A0' => '',
6961
        '%A1' => '¡',
6962
        '%A2' => '¢',
6963
        '%A3' => '£',
6964
        '%A4' => '¤',
6965
        '%A5' => '¥',
6966
        '%A6' => '¦',
6967
        '%A7' => '§',
6968
        '%A8' => '¨',
6969
        '%A9' => '©',
6970
        '%AA' => 'ª',
6971
        '%AB' => '«',
6972
        '%AC' => '¬',
6973
        '%AD' => '',
6974
        '%AE' => '®',
6975
        '%AF' => '¯',
6976
        '%B0' => '°',
6977
        '%B1' => '±',
6978
        '%B2' => '²',
6979
        '%B3' => '³',
6980
        '%B4' => '´',
6981
        '%B5' => 'µ',
6982
        '%B6' => '¶',
6983
        '%B7' => '·',
6984
        '%B8' => '¸',
6985
        '%B9' => '¹',
6986
        '%BA' => 'º',
6987
        '%BB' => '»',
6988
        '%BC' => '¼',
6989
        '%BD' => '½',
6990
        '%BE' => '¾',
6991
        '%BF' => '¿',
6992
        '%C0' => 'À',
6993
        '%C1' => 'Á',
6994
        '%C2' => 'Â',
6995
        '%C3' => 'Ã',
6996
        '%C4' => 'Ä',
6997
        '%C5' => 'Å',
6998
        '%C6' => 'Æ',
6999
        '%C7' => 'Ç',
7000
        '%C8' => 'È',
7001
        '%C9' => 'É',
7002
        '%CA' => 'Ê',
7003
        '%CB' => 'Ë',
7004
        '%CC' => 'Ì',
7005
        '%CD' => 'Í',
7006
        '%CE' => 'Î',
7007
        '%CF' => 'Ï',
7008
        '%D0' => 'Ð',
7009
        '%D1' => 'Ñ',
7010
        '%D2' => 'Ò',
7011
        '%D3' => 'Ó',
7012
        '%D4' => 'Ô',
7013
        '%D5' => 'Õ',
7014
        '%D6' => 'Ö',
7015
        '%D7' => '×',
7016
        '%D8' => 'Ø',
7017
        '%D9' => 'Ù',
7018
        '%DA' => 'Ú',
7019
        '%DB' => 'Û',
7020
        '%DC' => 'Ü',
7021
        '%DD' => 'Ý',
7022
        '%DE' => 'Þ',
7023
        '%DF' => 'ß',
7024
        '%E0' => 'à',
7025
        '%E1' => 'á',
7026
        '%E2' => 'â',
7027
        '%E3' => 'ã',
7028
        '%E4' => 'ä',
7029
        '%E5' => 'å',
7030
        '%E6' => 'æ',
7031
        '%E7' => 'ç',
7032
        '%E8' => 'è',
7033
        '%E9' => 'é',
7034
        '%EA' => 'ê',
7035
        '%EB' => 'ë',
7036
        '%EC' => 'ì',
7037
        '%ED' => 'í',
7038
        '%EE' => 'î',
7039
        '%EF' => 'ï',
7040
        '%F0' => 'ð',
7041
        '%F1' => 'ñ',
7042
        '%F2' => 'ò',
7043
        '%F3' => 'ó',
7044
        '%F4' => 'ô',
7045
        '%F5' => 'õ',
7046
        '%F6' => 'ö',
7047
        '%F7' => '÷',
7048
        '%F8' => 'ø',
7049
        '%F9' => 'ù',
7050
        '%FA' => 'ú',
7051
        '%FB' => 'û',
7052
        '%FC' => 'ü',
7053
        '%FD' => 'ý',
7054
        '%FE' => 'þ',
7055
        '%FF' => 'ÿ',
7056
    );
7057
7058
    return $array;
7059
  }
7060
7061
  /**
7062
   * Decodes an UTF-8 string to ISO-8859-1.
7063
   *
7064
   * @param string $str <p>The input string.</p>
7065
   *
7066
   * @return string
7067
   */
7068
  public static function utf8_decode($str)
7069
  {
7070
    // init
7071
    $str = (string)$str;
7072
7073
    if (!isset($str[0])) {
7074
      return '';
7075
    }
7076
7077
    $str = (string)self::to_utf8($str);
7078
7079
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7080
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7081
7082
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7083
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
7084
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
7085
    }
7086
7087
    /** @noinspection PhpInternalEntityUsedInspection */
7088
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
7089
  }
7090
7091
  /**
7092
   * Encodes an ISO-8859-1 string to UTF-8.
7093
   *
7094
   * @param string $str <p>The input string.</p>
7095
   *
7096
   * @return string
7097
   */
7098
  public static function utf8_encode($str)
7099
  {
7100
    // init
7101
    $str = (string)$str;
7102
7103
    if (!isset($str[0])) {
7104
      return '';
7105
    }
7106
7107
    $str = \utf8_encode($str);
7108
7109
    if (false === strpos($str, "\xC2")) {
7110
      return $str;
7111
    } else {
7112
7113
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
7114
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
7115
7116
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7117
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
7118
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
7119
      }
7120
7121
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7122
    }
7123
  }
7124
7125
  /**
7126
   * fix -> utf8-win1252 chars
7127
   *
7128
   * @param string $str <p>The input string.</p>
7129
   *
7130
   * @return string
7131
   *
7132
   * @deprecated use "UTF8::fix_simple_utf8()"
7133
   */
7134
  public static function utf8_fix_win1252_chars($str)
7135
  {
7136
    return self::fix_simple_utf8($str);
7137
  }
7138
7139
  /**
7140
   * Returns an array with all utf8 whitespace characters.
7141
   *
7142
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7143
   *
7144
   * @author: Derek E. [email protected]
7145
   *
7146
   * @return array <p>
7147
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7148
   *               as defined in above URL.
7149
   *               </p>
7150
   */
7151
  public static function whitespace_table()
7152
  {
7153
    return self::$whitespaceTable;
7154
  }
7155
7156
  /**
7157
   * Limit the number of words in a string.
7158
   *
7159
   * @param string $str      <p>The input string.</p>
7160
   * @param int    $words    <p>The limit of words as integer.</p>
7161
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7162
   *
7163
   * @return string
7164
   */
7165
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7166
  {
7167
    $str = (string)$str;
7168
7169
    if (!isset($str[0])) {
7170
      return '';
7171
    }
7172
7173
    $words = (int)$words;
7174
7175
    if ($words < 1) {
7176
      return '';
7177
    }
7178
7179
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7180
7181
    if (
7182
        !isset($matches[0])
7183
        ||
7184
        self::strlen($str) === self::strlen($matches[0])
7185
    ) {
7186
      return $str;
7187
    }
7188
7189
    return self::rtrim($matches[0]) . $strAddOn;
7190
  }
7191
7192
  /**
7193
   * Wraps a string to a given number of characters
7194
   *
7195
   * @link  http://php.net/manual/en/function.wordwrap.php
7196
   *
7197
   * @param string $str   <p>The input string.</p>
7198
   * @param int    $width [optional] <p>The column width.</p>
7199
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7200
   * @param bool   $cut   [optional] <p>
7201
   *                      If the cut is set to true, the string is
7202
   *                      always wrapped at or before the specified width. So if you have
7203
   *                      a word that is larger than the given width, it is broken apart.
7204
   *                      </p>
7205
   *
7206
   * @return string <p>The given string wrapped at the specified column.</p>
7207
   */
7208
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7209
  {
7210
    $str = (string)$str;
7211
    $break = (string)$break;
7212
7213
    if (!isset($str[0], $break[0])) {
7214
      return '';
7215
    }
7216
7217
    $w = '';
7218
    $strSplit = explode($break, $str);
7219
    $count = count($strSplit);
7220
7221
    $chars = array();
7222
    /** @noinspection ForeachInvariantsInspection */
7223
    for ($i = 0; $i < $count; ++$i) {
7224
7225
      if ($i) {
7226
        $chars[] = $break;
7227
        $w .= '#';
7228
      }
7229
7230
      $c = $strSplit[$i];
7231
      unset($strSplit[$i]);
7232
7233
      foreach (self::split($c) as $c) {
7234
        $chars[] = $c;
7235
        $w .= ' ' === $c ? ' ' : '?';
7236
      }
7237
    }
7238
7239
    $strReturn = '';
7240
    $j = 0;
7241
    $b = $i = -1;
7242
    $w = wordwrap($w, $width, '#', $cut);
7243
7244
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7245
      for (++$i; $i < $b; ++$i) {
7246
        $strReturn .= $chars[$j];
7247
        unset($chars[$j++]);
7248
      }
7249
7250
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7251
        unset($chars[$j++]);
7252
      }
7253
7254
      $strReturn .= $break;
7255
    }
7256
7257
    return $strReturn . implode('', $chars);
7258
  }
7259
7260
  /**
7261
   * Returns an array of Unicode White Space characters.
7262
   *
7263
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7264
   */
7265
  public static function ws()
7266
  {
7267
    return self::$whitespace;
7268
  }
7269
7270
}
7271