Completed
Push — master ( 5ec1a9...30a158 )
by Lars
04:44
created

UTF8::strnatcmp()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 0
cts 0
cp 0
rs 10
c 0
b 0
f 0
cc 2
eloc 2
nc 2
nop 2
crap 6
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
93
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
94
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
96
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
98
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
99
  );
100
101
  /**
102
   * Numeric code point => UTF-8 Character
103
   *
104
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
105
   *
106
   * @var array
107
   */
108
  private static $whitespace = array(
109
    // NUL Byte
110
    0     => "\x0",
111
    // Tab
112
    9     => "\x9",
113
    // New Line
114
    10    => "\xa",
115
    // Vertical Tab
116
    11    => "\xb",
117
    // Carriage Return
118
    13    => "\xd",
119
    // Ordinary Space
120
    32    => "\x20",
121
    // NO-BREAK SPACE
122
    160   => "\xc2\xa0",
123
    // OGHAM SPACE MARK
124
    5760  => "\xe1\x9a\x80",
125
    // MONGOLIAN VOWEL SEPARATOR
126
    6158  => "\xe1\xa0\x8e",
127
    // EN QUAD
128
    8192  => "\xe2\x80\x80",
129
    // EM QUAD
130
    8193  => "\xe2\x80\x81",
131
    // EN SPACE
132
    8194  => "\xe2\x80\x82",
133
    // EM SPACE
134
    8195  => "\xe2\x80\x83",
135
    // THREE-PER-EM SPACE
136
    8196  => "\xe2\x80\x84",
137
    // FOUR-PER-EM SPACE
138
    8197  => "\xe2\x80\x85",
139
    // SIX-PER-EM SPACE
140
    8198  => "\xe2\x80\x86",
141
    // FIGURE SPACE
142
    8199  => "\xe2\x80\x87",
143
    // PUNCTUATION SPACE
144
    8200  => "\xe2\x80\x88",
145
    // THIN SPACE
146
    8201  => "\xe2\x80\x89",
147
    //HAIR SPACE
148
    8202  => "\xe2\x80\x8a",
149
    // LINE SEPARATOR
150
    8232  => "\xe2\x80\xa8",
151
    // PARAGRAPH SEPARATOR
152
    8233  => "\xe2\x80\xa9",
153
    // NARROW NO-BREAK SPACE
154
    8239  => "\xe2\x80\xaf",
155
    // MEDIUM MATHEMATICAL SPACE
156
    8287  => "\xe2\x81\x9f",
157
    // IDEOGRAPHIC SPACE
158
    12288 => "\xe3\x80\x80",
159
  );
160
161
  /**
162
   * @var array
163
   */
164
  private static $whitespaceTable = array(
165
      'SPACE'                     => "\x20",
166
      'NO-BREAK SPACE'            => "\xc2\xa0",
167
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
168
      'EN QUAD'                   => "\xe2\x80\x80",
169
      'EM QUAD'                   => "\xe2\x80\x81",
170
      'EN SPACE'                  => "\xe2\x80\x82",
171
      'EM SPACE'                  => "\xe2\x80\x83",
172
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
173
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
174
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
175
      'FIGURE SPACE'              => "\xe2\x80\x87",
176
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
177
      'THIN SPACE'                => "\xe2\x80\x89",
178
      'HAIR SPACE'                => "\xe2\x80\x8a",
179
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
180
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
181
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
182
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
183
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
184
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
185
  );
186
187
  /**
188
   * bidirectional text chars
189
   *
190
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
191
   *
192
   * @var array
193
   */
194
  private static $bidiUniCodeControlsTable = array(
195
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
196
    8234 => "\xE2\x80\xAA",
197
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
198
    8235 => "\xE2\x80\xAB",
199
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
200
    8236 => "\xE2\x80\xAC",
201
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
202
    8237 => "\xE2\x80\xAD",
203
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
204
    8238 => "\xE2\x80\xAE",
205
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
206
    8294 => "\xE2\x81\xA6",
207
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
208
    8295 => "\xE2\x81\xA7",
209
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
210
    8296 => "\xE2\x81\xA8",
211
    // POP DIRECTIONAL ISOLATE
212
    8297 => "\xE2\x81\xA9",
213
  );
214
215
  /**
216
   * @var array
217
   */
218
  private static $commonCaseFold = array(
219
      'ſ'            => 's',
220
      "\xCD\x85"     => 'ι',
221
      'ς'            => 'σ',
222
      "\xCF\x90"     => 'β',
223
      "\xCF\x91"     => 'θ',
224
      "\xCF\x95"     => 'φ',
225
      "\xCF\x96"     => 'π',
226
      "\xCF\xB0"     => 'κ',
227
      "\xCF\xB1"     => 'ρ',
228
      "\xCF\xB5"     => 'ε',
229
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
230
      "\xE1\xBE\xBE" => 'ι',
231
  );
232
233
  /**
234
   * @var array
235
   */
236
  private static $brokenUtf8ToUtf8 = array(
237
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
238
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
239
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
240
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
241
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
242
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
243
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
244
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
245
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
246
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
247
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
248
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
249
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
250
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
251
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
252
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
253
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
254
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
255
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
256
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
257
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
258
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
259
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
260
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
261
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
262
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
263
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
264
      'ü'       => 'ü',
265
      'ä'       => 'ä',
266
      'ö'       => 'ö',
267
      'Ö'       => 'Ö',
268
      'ß'       => 'ß',
269
      'Ã '       => 'à',
270
      'á'       => 'á',
271
      'â'       => 'â',
272
      'ã'       => 'ã',
273
      'ù'       => 'ù',
274
      'ú'       => 'ú',
275
      'û'       => 'û',
276
      'Ù'       => 'Ù',
277
      'Ú'       => 'Ú',
278
      'Û'       => 'Û',
279
      'Ü'       => 'Ü',
280
      'ò'       => 'ò',
281
      'ó'       => 'ó',
282
      'ô'       => 'ô',
283
      'è'       => 'è',
284
      'é'       => 'é',
285
      'ê'       => 'ê',
286
      'ë'       => 'ë',
287
      'À'       => 'À',
288
      'Á'       => 'Á',
289
      'Â'       => 'Â',
290
      'Ã'       => 'Ã',
291
      'Ä'       => 'Ä',
292
      'Ã…'       => 'Å',
293
      'Ç'       => 'Ç',
294
      'È'       => 'È',
295
      'É'       => 'É',
296
      'Ê'       => 'Ê',
297
      'Ë'       => 'Ë',
298
      'ÃŒ'       => 'Ì',
299
      'Í'       => 'Í',
300
      'ÃŽ'       => 'Î',
301
      'Ï'       => 'Ï',
302
      'Ñ'       => 'Ñ',
303
      'Ã’'       => 'Ò',
304
      'Ó'       => 'Ó',
305
      'Ô'       => 'Ô',
306
      'Õ'       => 'Õ',
307
      'Ø'       => 'Ø',
308
      'Ã¥'       => 'å',
309
      'æ'       => 'æ',
310
      'ç'       => 'ç',
311
      'ì'       => 'ì',
312
      'í'       => 'í',
313
      'î'       => 'î',
314
      'ï'       => 'ï',
315
      'ð'       => 'ð',
316
      'ñ'       => 'ñ',
317
      'õ'       => 'õ',
318
      'ø'       => 'ø',
319
      'ý'       => 'ý',
320
      'ÿ'       => 'ÿ',
321
      '€'      => '€',
322
  );
323
324
  /**
325
   * @var array
326
   */
327
  private static $utf8ToWin1252 = array(
328
      "\xe2\x82\xac" => "\x80", // EURO SIGN
329
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
330
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
331
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
332
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
333
      "\xe2\x80\xa0" => "\x86", // DAGGER
334
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
335
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
336
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
337
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
338
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
339
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
340
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
341
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
342
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
343
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
344
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
345
      "\xe2\x80\xa2" => "\x95", // BULLET
346
      "\xe2\x80\x93" => "\x96", // EN DASH
347
      "\xe2\x80\x94" => "\x97", // EM DASH
348
      "\xcb\x9c"     => "\x98", // SMALL TILDE
349
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
350
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
351
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
352
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
353
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
354
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
355
  );
356
357
  /**
358
   * @var array
359
   */
360
  private static $utf8MSWord = array(
361
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
362
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
363
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
364
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
365
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
366
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
367
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
368
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
369
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
370
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
371
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
372
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
373
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
374
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
375
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
376
  );
377
378
  /**
379
   * @var array
380
   */
381
  private static $iconvEncoding = array(
382
      'ANSI_X3.4-1968',
383
      'ANSI_X3.4-1986',
384
      'ASCII',
385
      'CP367',
386
      'IBM367',
387
      'ISO-IR-6',
388
      'ISO646-US',
389
      'ISO_646.IRV:1991',
390
      'US',
391
      'US-ASCII',
392
      'CSASCII',
393
      'UTF-8',
394
      'ISO-10646-UCS-2',
395
      'UCS-2',
396
      'CSUNICODE',
397
      'UCS-2BE',
398
      'UNICODE-1-1',
399
      'UNICODEBIG',
400
      'CSUNICODE11',
401
      'UCS-2LE',
402
      'UNICODELITTLE',
403
      'ISO-10646-UCS-4',
404
      'UCS-4',
405
      'CSUCS4',
406
      'UCS-4BE',
407
      'UCS-4LE',
408
      'UTF-16',
409
      'UTF-16BE',
410
      'UTF-16LE',
411
      'UTF-32',
412
      'UTF-32BE',
413
      'UTF-32LE',
414
      'UNICODE-1-1-UTF-7',
415
      'UTF-7',
416
      'CSUNICODE11UTF7',
417
      'UCS-2-INTERNAL',
418
      'UCS-2-SWAPPED',
419
      'UCS-4-INTERNAL',
420
      'UCS-4-SWAPPED',
421
      'C99',
422
      'JAVA',
423
      'CP819',
424
      'IBM819',
425
      'ISO-8859-1',
426
      'ISO-IR-100',
427
      'ISO8859-1',
428
      'ISO_8859-1',
429
      'ISO_8859-1:1987',
430
      'L1',
431
      'LATIN1',
432
      'CSISOLATIN1',
433
      'ISO-8859-2',
434
      'ISO-IR-101',
435
      'ISO8859-2',
436
      'ISO_8859-2',
437
      'ISO_8859-2:1987',
438
      'L2',
439
      'LATIN2',
440
      'CSISOLATIN2',
441
      'ISO-8859-3',
442
      'ISO-IR-109',
443
      'ISO8859-3',
444
      'ISO_8859-3',
445
      'ISO_8859-3:1988',
446
      'L3',
447
      'LATIN3',
448
      'CSISOLATIN3',
449
      'ISO-8859-4',
450
      'ISO-IR-110',
451
      'ISO8859-4',
452
      'ISO_8859-4',
453
      'ISO_8859-4:1988',
454
      'L4',
455
      'LATIN4',
456
      'CSISOLATIN4',
457
      'CYRILLIC',
458
      'ISO-8859-5',
459
      'ISO-IR-144',
460
      'ISO8859-5',
461
      'ISO_8859-5',
462
      'ISO_8859-5:1988',
463
      'CSISOLATINCYRILLIC',
464
      'ARABIC',
465
      'ASMO-708',
466
      'ECMA-114',
467
      'ISO-8859-6',
468
      'ISO-IR-127',
469
      'ISO8859-6',
470
      'ISO_8859-6',
471
      'ISO_8859-6:1987',
472
      'CSISOLATINARABIC',
473
      'ECMA-118',
474
      'ELOT_928',
475
      'GREEK',
476
      'GREEK8',
477
      'ISO-8859-7',
478
      'ISO-IR-126',
479
      'ISO8859-7',
480
      'ISO_8859-7',
481
      'ISO_8859-7:1987',
482
      'ISO_8859-7:2003',
483
      'CSISOLATINGREEK',
484
      'HEBREW',
485
      'ISO-8859-8',
486
      'ISO-IR-138',
487
      'ISO8859-8',
488
      'ISO_8859-8',
489
      'ISO_8859-8:1988',
490
      'CSISOLATINHEBREW',
491
      'ISO-8859-9',
492
      'ISO-IR-148',
493
      'ISO8859-9',
494
      'ISO_8859-9',
495
      'ISO_8859-9:1989',
496
      'L5',
497
      'LATIN5',
498
      'CSISOLATIN5',
499
      'ISO-8859-10',
500
      'ISO-IR-157',
501
      'ISO8859-10',
502
      'ISO_8859-10',
503
      'ISO_8859-10:1992',
504
      'L6',
505
      'LATIN6',
506
      'CSISOLATIN6',
507
      'ISO-8859-11',
508
      'ISO8859-11',
509
      'ISO_8859-11',
510
      'ISO-8859-13',
511
      'ISO-IR-179',
512
      'ISO8859-13',
513
      'ISO_8859-13',
514
      'L7',
515
      'LATIN7',
516
      'ISO-8859-14',
517
      'ISO-CELTIC',
518
      'ISO-IR-199',
519
      'ISO8859-14',
520
      'ISO_8859-14',
521
      'ISO_8859-14:1998',
522
      'L8',
523
      'LATIN8',
524
      'ISO-8859-15',
525
      'ISO-IR-203',
526
      'ISO8859-15',
527
      'ISO_8859-15',
528
      'ISO_8859-15:1998',
529
      'LATIN-9',
530
      'ISO-8859-16',
531
      'ISO-IR-226',
532
      'ISO8859-16',
533
      'ISO_8859-16',
534
      'ISO_8859-16:2001',
535
      'L10',
536
      'LATIN10',
537
      'KOI8-R',
538
      'CSKOI8R',
539
      'KOI8-U',
540
      'KOI8-RU',
541
      'CP1250',
542
      'MS-EE',
543
      'WINDOWS-1250',
544
      'CP1251',
545
      'MS-CYRL',
546
      'WINDOWS-1251',
547
      'CP1252',
548
      'MS-ANSI',
549
      'WINDOWS-1252',
550
      'CP1253',
551
      'MS-GREEK',
552
      'WINDOWS-1253',
553
      'CP1254',
554
      'MS-TURK',
555
      'WINDOWS-1254',
556
      'CP1255',
557
      'MS-HEBR',
558
      'WINDOWS-1255',
559
      'CP1256',
560
      'MS-ARAB',
561
      'WINDOWS-1256',
562
      'CP1257',
563
      'WINBALTRIM',
564
      'WINDOWS-1257',
565
      'CP1258',
566
      'WINDOWS-1258',
567
      '850',
568
      'CP850',
569
      'IBM850',
570
      'CSPC850MULTILINGUAL',
571
      '862',
572
      'CP862',
573
      'IBM862',
574
      'CSPC862LATINHEBREW',
575
      '866',
576
      'CP866',
577
      'IBM866',
578
      'CSIBM866',
579
      'MAC',
580
      'MACINTOSH',
581
      'MACROMAN',
582
      'CSMACINTOSH',
583
      'MACCENTRALEUROPE',
584
      'MACICELAND',
585
      'MACCROATIAN',
586
      'MACROMANIA',
587
      'MACCYRILLIC',
588
      'MACUKRAINE',
589
      'MACGREEK',
590
      'MACTURKISH',
591
      'MACHEBREW',
592
      'MACARABIC',
593
      'MACTHAI',
594
      'HP-ROMAN8',
595
      'R8',
596
      'ROMAN8',
597
      'CSHPROMAN8',
598
      'NEXTSTEP',
599
      'ARMSCII-8',
600
      'GEORGIAN-ACADEMY',
601
      'GEORGIAN-PS',
602
      'KOI8-T',
603
      'CP154',
604
      'CYRILLIC-ASIAN',
605
      'PT154',
606
      'PTCP154',
607
      'CSPTCP154',
608
      'KZ-1048',
609
      'RK1048',
610
      'STRK1048-2002',
611
      'CSKZ1048',
612
      'MULELAO-1',
613
      'CP1133',
614
      'IBM-CP1133',
615
      'ISO-IR-166',
616
      'TIS-620',
617
      'TIS620',
618
      'TIS620-0',
619
      'TIS620.2529-1',
620
      'TIS620.2533-0',
621
      'TIS620.2533-1',
622
      'CP874',
623
      'WINDOWS-874',
624
      'VISCII',
625
      'VISCII1.1-1',
626
      'CSVISCII',
627
      'TCVN',
628
      'TCVN-5712',
629
      'TCVN5712-1',
630
      'TCVN5712-1:1993',
631
      'ISO-IR-14',
632
      'ISO646-JP',
633
      'JIS_C6220-1969-RO',
634
      'JP',
635
      'CSISO14JISC6220RO',
636
      'JISX0201-1976',
637
      'JIS_X0201',
638
      'X0201',
639
      'CSHALFWIDTHKATAKANA',
640
      'ISO-IR-87',
641
      'JIS0208',
642
      'JIS_C6226-1983',
643
      'JIS_X0208',
644
      'JIS_X0208-1983',
645
      'JIS_X0208-1990',
646
      'X0208',
647
      'CSISO87JISX0208',
648
      'ISO-IR-159',
649
      'JIS_X0212',
650
      'JIS_X0212-1990',
651
      'JIS_X0212.1990-0',
652
      'X0212',
653
      'CSISO159JISX02121990',
654
      'CN',
655
      'GB_1988-80',
656
      'ISO-IR-57',
657
      'ISO646-CN',
658
      'CSISO57GB1988',
659
      'CHINESE',
660
      'GB_2312-80',
661
      'ISO-IR-58',
662
      'CSISO58GB231280',
663
      'CN-GB-ISOIR165',
664
      'ISO-IR-165',
665
      'ISO-IR-149',
666
      'KOREAN',
667
      'KSC_5601',
668
      'KS_C_5601-1987',
669
      'KS_C_5601-1989',
670
      'CSKSC56011987',
671
      'EUC-JP',
672
      'EUCJP',
673
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
674
      'CSEUCPKDFMTJAPANESE',
675
      'MS_KANJI',
676
      'SHIFT-JIS',
677
      'SHIFT_JIS',
678
      'SJIS',
679
      'CSSHIFTJIS',
680
      'CP932',
681
      'ISO-2022-JP',
682
      'CSISO2022JP',
683
      'ISO-2022-JP-1',
684
      'ISO-2022-JP-2',
685
      'CSISO2022JP2',
686
      'CN-GB',
687
      'EUC-CN',
688
      'EUCCN',
689
      'GB2312',
690
      'CSGB2312',
691
      'GBK',
692
      'CP936',
693
      'MS936',
694
      'WINDOWS-936',
695
      'GB18030',
696
      'ISO-2022-CN',
697
      'CSISO2022CN',
698
      'ISO-2022-CN-EXT',
699
      'HZ',
700
      'HZ-GB-2312',
701
      'EUC-TW',
702
      'EUCTW',
703
      'CSEUCTW',
704
      'BIG-5',
705
      'BIG-FIVE',
706
      'BIG5',
707
      'BIGFIVE',
708
      'CN-BIG5',
709
      'CSBIG5',
710
      'CP950',
711
      'BIG5-HKSCS:1999',
712
      'BIG5-HKSCS:2001',
713
      'BIG5-HKSCS',
714
      'BIG5-HKSCS:2004',
715
      'BIG5HKSCS',
716
      'EUC-KR',
717
      'EUCKR',
718
      'CSEUCKR',
719
      'CP949',
720
      'UHC',
721
      'CP1361',
722
      'JOHAB',
723
      'ISO-2022-KR',
724
      'CSISO2022KR',
725
      'CP856',
726
      'CP922',
727
      'CP943',
728
      'CP1046',
729
      'CP1124',
730
      'CP1129',
731
      'CP1161',
732
      'IBM-1161',
733
      'IBM1161',
734
      'CSIBM1161',
735
      'CP1162',
736
      'IBM-1162',
737
      'IBM1162',
738
      'CSIBM1162',
739
      'CP1163',
740
      'IBM-1163',
741
      'IBM1163',
742
      'CSIBM1163',
743
      'DEC-KANJI',
744
      'DEC-HANYU',
745
      '437',
746
      'CP437',
747
      'IBM437',
748
      'CSPC8CODEPAGE437',
749
      'CP737',
750
      'CP775',
751
      'IBM775',
752
      'CSPC775BALTIC',
753
      '852',
754
      'CP852',
755
      'IBM852',
756
      'CSPCP852',
757
      'CP853',
758
      '855',
759
      'CP855',
760
      'IBM855',
761
      'CSIBM855',
762
      '857',
763
      'CP857',
764
      'IBM857',
765
      'CSIBM857',
766
      'CP858',
767
      '860',
768
      'CP860',
769
      'IBM860',
770
      'CSIBM860',
771
      '861',
772
      'CP-IS',
773
      'CP861',
774
      'IBM861',
775
      'CSIBM861',
776
      '863',
777
      'CP863',
778
      'IBM863',
779
      'CSIBM863',
780
      'CP864',
781
      'IBM864',
782
      'CSIBM864',
783
      '865',
784
      'CP865',
785
      'IBM865',
786
      'CSIBM865',
787
      '869',
788
      'CP-GR',
789
      'CP869',
790
      'IBM869',
791
      'CSIBM869',
792
      'CP1125',
793
      'EUC-JISX0213',
794
      'SHIFT_JISX0213',
795
      'ISO-2022-JP-3',
796
      'BIG5-2003',
797
      'ISO-IR-230',
798
      'TDS565',
799
      'ATARI',
800
      'ATARIST',
801
      'RISCOS-LATIN1',
802
  );
803
804
  /**
805
   * @var array
806
   */
807 1
  private static $support = array();
808
809 1
  /**
810 1
   * __construct()
811
   */
812
  public function __construct()
813
  {
814
    self::checkForSupport();
815
  }
816
817
  /**
818
   * Return the character at the specified position: $str[1] like functionality.
819
   *
820 2
   * @param string $str <p>A UTF-8 string.</p>
821
   * @param int    $pos <p>The position of character to return.</p>
822 2
   *
823
   * @return string <p>Single Multi-Byte character.</p>
824
   */
825
  public static function access($str, $pos)
826
  {
827
    $str = (string)$str;
828
    $pos = (int)$pos;
829
830
    if (!isset($str[0])) {
831
      return '';
832
    }
833
834 1
    if ($pos < 0) {
835
      return '';
836 1
    }
837 1
838 1
    return self::substr($str, $pos, 1);
839
  }
840 1
841
  /**
842
   * Prepends UTF-8 BOM character to the string and returns the whole string.
843
   *
844
   * INFO: If BOM already existed there, the Input string is returned.
845
   *
846
   * @param string $str <p>The input string.</p>
847
   *
848
   * @return string <p>The output string that contains BOM.</p>
849
   */
850 1
  public static function add_bom_to_string($str)
851
  {
852 1
    if (self::string_has_bom($str) === false) {
853
      $str = self::bom() . $str;
854
    }
855
856
    return $str;
857
  }
858
859
  /**
860 2
   * Convert binary into an string.
861
   *
862 2
   * @param mixed $bin 1|0
863
   *
864
   * @return string
865
   */
866
  public static function binary_to_str($bin)
867
  {
868
    if (!isset($bin[0])) {
869
      return '';
870
    }
871
872
    return pack('H*', base_convert($bin, 2, 16));
873
  }
874 1
875
  /**
876 1
   * Returns the UTF-8 Byte Order Mark Character.
877
   *
878
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
879
   *
880
   * @return string UTF-8 Byte Order Mark
881
   */
882
  public static function bom()
883
  {
884 2
    return "\xef\xbb\xbf";
885
  }
886 2
887
  /**
888 1
   * @alias of UTF8::chr_map()
889
   *
890 1
   * @see   UTF8::chr_map()
891 1
   *
892 1
   * @param string|array $callback
893 1
   * @param string       $str
894 1
   *
895 1
   * @return array
896 2
   */
897
  public static function callback($callback, $str)
898
  {
899
    return self::chr_map($callback, $str);
900
  }
901
902
  /**
903
   * This method will auto-detect your server environment for UTF-8 support.
904
   *
905
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
906
   */
907 9
  public static function checkForSupport()
908
  {
909 9
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
910 9
911 1
      self::$support['already_checked_via_portable_utf8'] = true;
912
913
      // http://php.net/manual/en/book.mbstring.php
914 9
      self::$support['mbstring'] = self::mbstring_loaded();
915
916
      // http://php.net/manual/en/book.iconv.php
917
      self::$support['iconv'] = self::iconv_loaded();
918 9
919
      // http://php.net/manual/en/book.intl.php
920
      self::$support['intl'] = self::intl_loaded();
921
922
      // http://php.net/manual/en/class.intlchar.php
923 9
      self::$support['intlChar'] = self::intlChar_loaded();
924 9
925 8
      // http://php.net/manual/en/book.pcre.php
926
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
927
    }
928
  }
929 8
930 6
  /**
931
   * Generates a UTF-8 encoded character from the given code point.
932
   *
933 7
   * INFO: opposite to UTF8::ord()
934 6
   *
935 6
   * @param int    $code_point <p>The code point for which to generate a character.</p>
936
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
937
   *
938 7
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
939 7
   */
940 7
  public static function chr($code_point, $encoding = 'UTF-8')
941 7
  {
942
    $i = (int)$code_point;
943
    if ($i !== $code_point) {
944 1
      return null;
945 1
    }
946 1
947 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
948 1
      self::checkForSupport();
949
    }
950
951
    if ($encoding !== 'UTF-8') {
952
      $encoding = self::normalize_encoding($encoding);
953
    } elseif (self::$support['intlChar'] === true) {
954
      return \IntlChar::chr($code_point);
955
    }
956
957
    // use static cache, if there is no support for "IntlChar"
958
    static $cache = array();
959
    $cacheKey = $code_point . $encoding;
960
    if (isset($cache[$cacheKey]) === true) {
961
      return $cache[$cacheKey];
962
    }
963 1
964
    if (0x80 > $code_point %= 0x200000) {
965 1
      $str = chr($code_point);
966
    } elseif (0x800 > $code_point) {
967 1
      $str = chr(0xC0 | $code_point >> 6) .
968
             chr(0x80 | $code_point & 0x3F);
969
    } elseif (0x10000 > $code_point) {
970
      $str = chr(0xE0 | $code_point >> 12) .
971
             chr(0x80 | $code_point >> 6 & 0x3F) .
972
             chr(0x80 | $code_point & 0x3F);
973
    } else {
974
      $str = chr(0xF0 | $code_point >> 18) .
975
             chr(0x80 | $code_point >> 12 & 0x3F) .
976
             chr(0x80 | $code_point >> 6 & 0x3F) .
977
             chr(0x80 | $code_point & 0x3F);
978
    }
979
980
    if ($encoding !== 'UTF-8') {
981
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
982 4
    }
983
984 4
    // add into static cache
985 3
    $cache[$cacheKey] = $str;
986
987
    return $str;
988 4
  }
989
990
  /**
991
   * Applies callback to all characters of a string.
992
   *
993
   * @param string|array $callback <p>The callback function.</p>
994
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
995
   *
996
   * @return array <p>The outcome of callback.</p>
997
   */
998 2
  public static function chr_map($callback, $str)
999
  {
1000 2
    $chars = self::split($str);
1001 2
1002 2
    return array_map($callback, $chars);
1003
  }
1004 2
1005
  /**
1006 2
   * Generates an array of byte length of each character of a Unicode string.
1007
   *
1008
   * 1 byte => U+0000  - U+007F
1009 2
   * 2 byte => U+0080  - U+07FF
1010
   * 3 byte => U+0800  - U+FFFF
1011 2
   * 4 byte => U+10000 - U+10FFFF
1012 2
   *
1013 2
   * @param string $str <p>The original Unicode string.</p>
1014
   *
1015 1
   * @return array <p>An array of byte lengths of each character.</p>
1016 1
   */
1017 1
  public static function chr_size_list($str)
1018
  {
1019
    $str = (string)$str;
1020
1021
    if (!isset($str[0])) {
1022
      return array();
1023 2
    }
1024
1025 2
    return array_map('strlen', self::split($str));
1026 2
  }
1027
1028 2
  /**
1029
   * Get a decimal code representation of a specific character.
1030
   *
1031
   * @param string $char <p>The input character.</p>
1032
   *
1033
   * @return int
1034
   */
1035
  public static function chr_to_decimal($char)
1036
  {
1037
    $char = (string)$char;
1038
    $code = self::ord($char[0]);
1039 1
    $bytes = 1;
1040
1041 1
    if (!($code & 0x80)) {
1042
      // 0xxxxxxx
1043
      return $code;
1044
    }
1045
1046
    if (($code & 0xe0) === 0xc0) {
1047
      // 110xxxxx
1048
      $bytes = 2;
1049
      $code &= ~0xc0;
1050
    } elseif (($code & 0xf0) === 0xe0) {
1051
      // 1110xxxx
1052
      $bytes = 3;
1053 1
      $code &= ~0xe0;
1054
    } elseif (($code & 0xf8) === 0xf0) {
1055 1
      // 11110xxx
1056
      $bytes = 4;
1057
      $code &= ~0xf0;
1058
    }
1059
1060
    for ($i = 2; $i <= $bytes; $i++) {
1061
      // 10xxxxxx
1062
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1063
    }
1064
1065
    return $code;
1066
  }
1067
1068
  /**
1069
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1070
   *
1071 44
   * @param string $char <p>The input character</p>
1072
   * @param string $pfix [optional]
1073
   *
1074
   * @return string <p>The code point encoded as U+xxxx<p>
1075
   */
1076
  public static function chr_to_hex($char, $pfix = 'U+')
1077
  {
1078
    if ($char === '&#0;') {
1079
      $char = '';
1080
    }
1081
1082
    return self::int_to_hex(self::ord($char), $pfix);
1083
  }
1084
1085
  /**
1086 44
   * alias for "UTF8::chr_to_decimal()"
1087 44
   *
1088
   * @see UTF8::chr_to_decimal()
1089 44
   *
1090 44
   * @param string $chr
1091
   *
1092 44
   * @return int
1093 17
   */
1094 17
  public static function chr_to_int($chr)
1095
  {
1096 44
    return self::chr_to_decimal($chr);
1097 12
  }
1098 12
1099
  /**
1100 44
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1101 5
   *
1102 5
   * @param string $body     <p>The original string to be split.</p>
1103
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1104 44
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1105
   *
1106
   * @return string <p>The chunked string</p>
1107
   */
1108
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1109
  {
1110
    return implode($end, self::split($body, $chunklen));
1111
  }
1112
1113
  /**
1114 4
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1115
   *
1116 4
   * @param string $str                     <p>The string to be sanitized.</p>
1117
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1118 4
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1119 1
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1120
   *                                        => "..."</p>
1121
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1122
   *                                        $normalize_whitespace</p>
1123 4
   *
1124
   * @return string <p>Clean UTF-8 encoded string.</p>
1125
   */
1126
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1127
  {
1128
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1129
    // caused connection reset problem on larger strings
1130 4
1131
    $regx = '/
1132 4
      (
1133
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1134
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1135
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1136
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1137
        ){1,100}                      # ...one or more times
1138
      )
1139
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1140
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1141
    /x';
1142
    $str = preg_replace($regx, '$1', $str);
1143
1144
    $str = self::replace_diamond_question_mark($str, '');
1145
    $str = self::remove_invisible_characters($str);
1146 5
1147
    if ($normalize_whitespace === true) {
1148 5
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1149 5
    }
1150 5
1151
    if ($normalize_msword === true) {
1152 5
      $str = self::normalize_msword($str);
1153
    }
1154 5
1155 5
    if ($remove_bom === true) {
1156 5
      $str = self::remove_bom($str);
1157
    }
1158 5
1159
    return $str;
1160 5
  }
1161 1
1162
  /**
1163 1
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1164 1
   *
1165 1
   * @param string $str <p>The input string.</p>
1166
   *
1167 1
   * @return string
1168 1
   */
1169
  public static function cleanup($str)
1170 5
  {
1171
    $str = (string)$str;
1172
1173
    if (!isset($str[0])) {
1174
      return '';
1175
    }
1176
1177
    // fixed ISO <-> UTF-8 Errors
1178
    $str = self::fix_simple_utf8($str);
1179
1180
    // remove all none UTF-8 symbols
1181
    // && remove diamond question mark (�)
1182 6
    // && remove remove invisible characters (e.g. "\0")
1183
    // && remove BOM
1184 6
    // && normalize whitespace chars (but keep non-breaking-spaces)
1185
    $str = self::clean($str, true, true, false, true);
1186
1187
    return (string)$str;
1188
  }
1189
1190
  /**
1191
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1192
   *
1193
   * INFO: opposite to UTF8::string()
1194 1
   *
1195
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1196 1
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1197 1
   *                                    default, code points will be returned as integers.</p>
1198 1
   *
1199
   * @return array <p>The array of code points.</p>
1200 1
   */
1201
  public static function codepoints($arg, $u_style = false)
1202
  {
1203
    if (is_string($arg)) {
1204
      $arg = self::split($arg);
1205
    }
1206
1207
    $arg = array_map(
1208
        array(
1209
            '\\voku\\helper\\UTF8',
1210
            'ord',
1211
        ),
1212
        $arg
1213
    );
1214
1215
    if ($u_style) {
1216 11
      $arg = array_map(
1217
          array(
1218 11
              '\\voku\\helper\\UTF8',
1219 11
              'int_to_hex',
1220
          ),
1221 11
          $arg
1222 5
      );
1223
    }
1224
1225 11
    return $arg;
1226 1
  }
1227 1
1228
  /**
1229 11
   * Returns count of characters used in a string.
1230
   *
1231
   * @param string $str       <p>The input string.</p>
1232
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1233 11
   *
1234
   * @return array <p>An associative array of Character as keys and
1235
   *               their count as values.</p>
1236 11
   */
1237
  public static function count_chars($str, $cleanUtf8 = false)
1238 1
  {
1239 11
    return array_count_values(self::split($str, 1, $cleanUtf8));
1240
  }
1241
1242
  /**
1243 11
   * Converts a int-value into an UTF-8 character.
1244
   *
1245
   * @param int $int
1246 11
   *
1247 1
   * @return string
1248 1
   */
1249 1
  public static function decimal_to_chr($int)
1250 11
  {
1251 11
    if (Bootup::is_php('5.4') === true) {
1252
      $flags = ENT_QUOTES | ENT_HTML5;
1253
    } else {
1254
      $flags = ENT_QUOTES;
1255
    }
1256 2
1257
    return self::html_entity_decode('&#' . $int . ';', $flags);
1258
  }
1259 1
1260
  /**
1261
   * Encode a string with a new charset-encoding.
1262 2
   *
1263 1
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1264
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1265
   *
1266 2
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1267 2
   * @param string $str      <p>The input string</p>
1268 2
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1269
   *                         /> otherwise we auto-detect the current string-encoding</p>
1270 2
   *
1271
   * @return string
1272 2
   */
1273 2
  public static function encode($encoding, $str, $force = true)
1274
  {
1275
    $str = (string)$str;
1276
    $encoding = (string)$encoding;
1277 1
1278
    if (!isset($str[0], $encoding[0])) {
1279
      return $str;
1280
    }
1281
1282
    if ($encoding !== 'UTF-8') {
1283
      $encoding = self::normalize_encoding($encoding);
1284
    }
1285
1286
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1287
      self::checkForSupport();
1288
    }
1289
1290
    $encodingDetected = self::str_detect_encoding($str);
1291
1292
    if (
1293
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1294
        &&
1295
        (
1296
            $force === true
1297
            ||
1298
            $encodingDetected !== $encoding
1299
        )
1300
    ) {
1301
1302
      if (
1303
          $encoding === 'UTF-8'
1304
          &&
1305
          (
1306
              $force === true
1307
              || $encodingDetected === 'UTF-8'
1308
              || $encodingDetected === 'WINDOWS-1252'
1309
              || $encodingDetected === 'ISO-8859-1'
1310
          )
1311
      ) {
1312
        return self::to_utf8($str);
1313
      }
1314
1315
      if (
1316
          $encoding === 'ISO-8859-1'
1317
          &&
1318
          (
1319
              $force === true
1320
              || $encodingDetected === 'ISO-8859-1'
1321
              || $encodingDetected === 'UTF-8'
1322
          )
1323
      ) {
1324
        return self::to_iso8859($str);
1325
      }
1326
1327 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1328
          $encoding !== 'UTF-8'
1329
          &&
1330
          $encoding !== 'WINDOWS-1252'
1331
          &&
1332
          self::$support['mbstring'] === false
1333
      ) {
1334
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1335
      }
1336
1337
      $strEncoded = \mb_convert_encoding(
1338
          $str,
1339
          $encoding,
1340
          $encodingDetected
1341
      );
1342
1343
      if ($strEncoded) {
1344
        return $strEncoded;
1345
      }
1346
    }
1347
1348
    return $str;
1349
  }
1350
1351
  /**
1352
   * Reads entire file into a string.
1353
   *
1354
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1355
   *
1356
   * @link http://php.net/manual/en/function.file-get-contents.php
1357
   *
1358
   * @param string        $filename      <p>
1359
   *                                     Name of the file to read.
1360
   *                                     </p>
1361
   * @param int|null      $flags         [optional] <p>
1362 2
   *                                     Prior to PHP 6, this parameter is called
1363
   *                                     use_include_path and is a bool.
1364
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1365 2
   *                                     to trigger include path
1366 2
   *                                     search.
1367
   *                                     </p>
1368 2
   *                                     <p>
1369 2
   *                                     The value of flags can be any combination of
1370
   *                                     the following flags (with some restrictions), joined with the
1371
   *                                     binary OR (|)
1372
   *                                     operator.
1373 2
   *                                     </p>
1374 2
   *                                     <p>
1375
   *                                     <table>
1376 2
   *                                     Available flags
1377 2
   *                                     <tr valign="top">
1378
   *                                     <td>Flag</td>
1379 2
   *                                     <td>Description</td>
1380 1
   *                                     </tr>
1381 1
   *                                     <tr valign="top">
1382 2
   *                                     <td>
1383
   *                                     FILE_USE_INCLUDE_PATH
1384
   *                                     </td>
1385
   *                                     <td>
1386 2
   *                                     Search for filename in the include directory.
1387 1
   *                                     See include_path for more
1388
   *                                     information.
1389
   *                                     </td>
1390 1
   *                                     </tr>
1391 1
   *                                     <tr valign="top">
1392 1
   *                                     <td>
1393 1
   *                                     FILE_TEXT
1394
   *                                     </td>
1395 1
   *                                     <td>
1396
   *                                     As of PHP 6, the default encoding of the read
1397
   *                                     data is UTF-8. You can specify a different encoding by creating a
1398
   *                                     custom context or by changing the default using
1399
   *                                     stream_default_encoding. This flag cannot be
1400
   *                                     used with FILE_BINARY.
1401
   *                                     </td>
1402
   *                                     </tr>
1403
   *                                     <tr valign="top">
1404
   *                                     <td>
1405 1
   *                                     FILE_BINARY
1406
   *                                     </td>
1407 1
   *                                     <td>
1408
   *                                     With this flag, the file is read in binary mode. This is the default
1409
   *                                     setting and cannot be used with FILE_TEXT.
1410
   *                                     </td>
1411
   *                                     </tr>
1412
   *                                     </table>
1413
   *                                     </p>
1414
   * @param resource|null $context       [optional] <p>
1415
   *                                     A valid context resource created with
1416
   *                                     stream_context_create. If you don't need to use a
1417
   *                                     custom context, you can skip this parameter by &null;.
1418
   *                                     </p>
1419 9
   * @param int|null      $offset        [optional] <p>
1420
   *                                     The offset where the reading starts.
1421 9
   *                                     </p>
1422 9
   * @param int|null      $maxlen        [optional] <p>
1423 3
   *                                     Maximum length of data read. The default is to read until end
1424
   *                                     of file is reached.
1425 3
   *                                     </p>
1426 3
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1427 3
   *
1428 9
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1429 2
   *                                     or pdf, because they used non default utf-8 chars</p>
1430 2
   *
1431 2
   * @return string <p>The function returns the read data or false on failure.</p>
1432 2
   */
1433 9
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1434
  {
1435 8
    // init
1436
    $timeout = (int)$timeout;
1437 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1438 2
1439
    if ($timeout && $context === null) {
1440 8
      $context = stream_context_create(
1441
          array(
1442 8
              'http' =>
1443 6
                  array(
1444 6
                      'timeout' => $timeout,
1445 6
                  ),
1446
          )
1447 6
      );
1448 3
    }
1449 3
1450 5
    if (is_int($maxlen)) {
1451
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1452
    } else {
1453
      $data = file_get_contents($filename, $flags, $context, $offset);
1454
    }
1455 8
1456 8
    // return false on error
1457 5
    if ($data === false) {
1458 8
      return false;
1459
    }
1460
1461 2
    if ($convertToUtf8 === true) {
1462 2
      $data = self::encode('UTF-8', $data, false);
1463 8
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1464 8
    }
1465 9
1466
    return $data;
1467 9
  }
1468
1469
  /**
1470
   * Checks if a file starts with BOM (Byte Order Mark) character.
1471
   *
1472
   * @param string $file_path <p>Path to a valid file.</p>
1473
   *
1474
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1475
   */
1476
  public static function file_has_bom($file_path)
1477
  {
1478
    return self::string_has_bom(file_get_contents($file_path));
1479
  }
1480
1481
  /**
1482
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1483
   *
1484
   * @param mixed  $var
1485
   * @param int    $normalization_form
1486
   * @param string $leading_combining
1487
   *
1488
   * @return mixed
1489
   */
1490
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1491
  {
1492
    switch (gettype($var)) {
1493 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1494
        foreach ($var as $k => $v) {
1495
          /** @noinspection AlterInForeachInspection */
1496
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1497
        }
1498
        break;
1499 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1500
        foreach ($var as $k => $v) {
1501
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1502
        }
1503
        break;
1504
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1505
1506
        if (false !== strpos($var, "\r")) {
1507
          // Workaround https://bugs.php.net/65732
1508
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1509
        }
1510
1511
        if (self::is_ascii($var) === false) {
1512
          /** @noinspection PhpUndefinedClassInspection */
1513
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1514
            $n = '-';
1515
          } else {
1516
            /** @noinspection PhpUndefinedClassInspection */
1517
            $n = \Normalizer::normalize($var, $normalization_form);
1518
1519
            if (isset($n[0])) {
1520 1
              $var = $n;
1521
            } else {
1522 1
              $var = self::encode('UTF-8', $var);
1523 1
            }
1524 1
          }
1525 1
1526
          if (
1527
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1528 1
              &&
1529
              preg_match('/^\p{Mn}/u', $var)
1530
          ) {
1531
            // Prevent leading combining chars
1532
            // for NFC-safe concatenations.
1533
            $var = $leading_combining . $var;
1534
          }
1535
        }
1536
1537
        break;
1538
    }
1539
1540 1
    return $var;
1541
  }
1542 1
1543 1
  /**
1544 1
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1545 1
   *
1546
   * Gets a specific external variable by name and optionally filters it
1547
   *
1548 1
   * @link  http://php.net/manual/en/function.filter-input.php
1549
   *
1550
   * @param int    $type          <p>
1551
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1552
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1553
   *                              <b>INPUT_ENV</b>.
1554
   *                              </p>
1555
   * @param string $variable_name <p>
1556
   *                              Name of a variable to get.
1557
   *                              </p>
1558
   * @param int    $filter        [optional] <p>
1559 1
   *                              The ID of the filter to apply. The
1560
   *                              manual page lists the available filters.
1561 1
   *                              </p>
1562
   * @param mixed  $options       [optional] <p>
1563
   *                              Associative array of options or bitwise disjunction of flags. If filter
1564
   *                              accepts options, flags can be provided in "flags" field of array.
1565
   *                              </p>
1566
   *
1567
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1568
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1569
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1570
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1571
   * @since 5.2.0
1572
   */
1573 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1574
  {
1575
    if (4 > func_num_args()) {
1576
      $var = filter_input($type, $variable_name, $filter);
1577 7
    } else {
1578
      $var = filter_input($type, $variable_name, $filter, $options);
1579 7
    }
1580 7
1581
    return self::filter($var);
1582 7
  }
1583
1584 7
  /**
1585 2
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1586
   *
1587
   * Gets external variables and optionally filters them
1588 7
   *
1589 1
   * @link  http://php.net/manual/en/function.filter-input-array.php
1590 1
   *
1591 1
   * @param int   $type       <p>
1592
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1593 7
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1594
   *                          <b>INPUT_ENV</b>.
1595
   *                          </p>
1596
   * @param mixed $definition [optional] <p>
1597
   *                          An array defining the arguments. A valid key is a string
1598
   *                          containing a variable name and a valid value is either a filter type, or an array
1599
   *                          optionally specifying the filter, flags and options. If the value is an
1600
   *                          array, valid keys are filter which specifies the
1601
   *                          filter type,
1602
   *                          flags which specifies any flags that apply to the
1603 1
   *                          filter, and options which specifies any options that
1604
   *                          apply to the filter. See the example below for a better understanding.
1605 1
   *                          </p>
1606
   *                          <p>
1607 1
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1608
   *                          input array are filtered by this filter.
1609
   *                          </p>
1610 1
   * @param bool  $add_empty  [optional] <p>
1611 1
   *                          Add missing keys as <b>NULL</b> to the return value.
1612
   *                          </p>
1613 1
   *
1614
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1615
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1616 1
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1617 1
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1618 1
   * fails.
1619 1
   * @since 5.2.0
1620 1
   */
1621 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1622 1
  {
1623
    if (2 > func_num_args()) {
1624
      $a = filter_input_array($type);
1625
    } else {
1626
      $a = filter_input_array($type, $definition, $add_empty);
1627
    }
1628
1629
    return self::filter($a);
1630
  }
1631
1632 1
  /**
1633
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1634 1
   *
1635
   * Filters a variable with a specified filter
1636
   *
1637
   * @link  http://php.net/manual/en/function.filter-var.php
1638 1
   *
1639
   * @param mixed $variable <p>
1640
   *                        Value to filter.
1641
   *                        </p>
1642
   * @param int   $filter   [optional] <p>
1643
   *                        The ID of the filter to apply. The
1644
   *                        manual page lists the available filters.
1645
   *                        </p>
1646
   * @param mixed $options  [optional] <p>
1647
   *                        Associative array of options or bitwise disjunction of flags. If filter
1648
   *                        accepts options, flags can be provided in "flags" field of array. For
1649
   *                        the "callback" filter, callable type should be passed. The
1650
   *                        callback must accept one argument, the value to be filtered, and return
1651
   *                        the value after filtering/sanitizing it.
1652
   *                        </p>
1653
   *                        <p>
1654 1
   *                        <code>
1655
   *                        // for filters that accept options, use this format
1656 1
   *                        $options = array(
1657 1
   *                        'options' => array(
1658
   *                        'default' => 3, // value to return if the filter fails
1659
   *                        // other options here
1660 1
   *                        'min_range' => 0
1661
   *                        ),
1662 1
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1663 1
   *                        );
1664 1
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1665 1
   *                        // for filter that only accept flags, you can pass them directly
1666 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1667 1
   *                        // for filter that only accept flags, you can also pass as an array
1668 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1669 1
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1670 1
   *                        // callback validate filter
1671 1
   *                        function foo($value)
1672 1
   *                        {
1673
   *                        // Expected format: Surname, GivenNames
1674
   *                        if (strpos($value, ", ") === false) return false;
1675
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1676
   *                        $empty = (empty($surname) || empty($givennames));
1677
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1678
   *                        if ($empty || $notstrings) {
1679
   *                        return false;
1680
   *                        } else {
1681
   *                        return $value;
1682
   *                        }
1683
   *                        }
1684
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1685
   *                        </code>
1686
   *                        </p>
1687
   *
1688
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1689
   * @since 5.2.0
1690
   */
1691 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1692 1
  {
1693 1
    if (3 > func_num_args()) {
1694
      $variable = filter_var($variable, $filter);
1695
    } else {
1696
      $variable = filter_var($variable, $filter, $options);
1697
    }
1698
1699
    return self::filter($variable);
1700
  }
1701
1702
  /**
1703
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1704
   *
1705
   * Gets multiple variables and optionally filters them
1706
   *
1707
   * @link  http://php.net/manual/en/function.filter-var-array.php
1708
   *
1709
   * @param array $data       <p>
1710
   *                          An array with string keys containing the data to filter.
1711
   *                          </p>
1712
   * @param mixed $definition [optional] <p>
1713
   *                          An array defining the arguments. A valid key is a string
1714
   *                          containing a variable name and a valid value is either a
1715
   *                          filter type, or an
1716
   *                          array optionally specifying the filter, flags and options.
1717
   *                          If the value is an array, valid keys are filter
1718
   *                          which specifies the filter type,
1719
   *                          flags which specifies any flags that apply to the
1720
   *                          filter, and options which specifies any options that
1721
   *                          apply to the filter. See the example below for a better understanding.
1722
   *                          </p>
1723
   *                          <p>
1724
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1725
   *                          input array are filtered by this filter.
1726
   *                          </p>
1727
   * @param bool  $add_empty  [optional] <p>
1728
   *                          Add missing keys as <b>NULL</b> to the return value.
1729
   *                          </p>
1730
   *
1731
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1732
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1733
   * the variable is not set.
1734
   * @since 5.2.0
1735
   */
1736 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1737
  {
1738
    if (2 > func_num_args()) {
1739
      $a = filter_var_array($data);
1740
    } else {
1741
      $a = filter_var_array($data, $definition, $add_empty);
1742
    }
1743
1744
    return self::filter($a);
1745
  }
1746
1747
  /**
1748
   * Check if the number of unicode characters are not more than the specified integer.
1749
   *
1750
   * @param string $str      The original string to be checked.
1751
   * @param int    $box_size The size in number of chars to be checked against string.
1752 1
   *
1753
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1754 1
   */
1755 1
  public static function fits_inside($str, $box_size)
1756
  {
1757 1
    return (self::strlen($str) <= $box_size);
1758
  }
1759
1760
  /**
1761
   * Try to fix simple broken UTF-8 strings.
1762
   *
1763
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1764
   *
1765
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1766
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1767
   * See: http://en.wikipedia.org/wiki/Windows-1252
1768
   *
1769
   * @param string $str <p>The input string</p>
1770
   *
1771
   * @return string
1772 1
   */
1773 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1774 1
  {
1775
    // init
1776
    $str = (string)$str;
1777
1778
    if (!isset($str[0])) {
1779
      return '';
1780
    }
1781
1782
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1783
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1784
1785
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1786 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1787
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1788 1
    }
1789 1
1790
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1791
  }
1792 1
1793 1
  /**
1794
   * Fix a double (or multiple) encoded UTF8 string.
1795
   *
1796 1
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1797
   *
1798
   * @return mixed
1799
   */
1800
  public static function fix_utf8($str)
1801
  {
1802
    if (is_array($str)) {
1803
1804
      /** @noinspection ForeachSourceInspection */
1805
      foreach ($str as $k => $v) {
1806
        /** @noinspection AlterInForeachInspection */
1807
        /** @noinspection OffsetOperationsInspection */
1808
        $str[$k] = self::fix_utf8($v);
1809
      }
1810 1
1811
      return $str;
1812 1
    }
1813
1814
    $last = '';
1815
    while ($last !== $str) {
1816
      $last = $str;
1817
      $str = self::to_utf8(
1818
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1817 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1819
      );
1820
    }
1821
1822
    return $str;
1823
  }
1824
1825
  /**
1826 2
   * Get character of a specific character.
1827
   *
1828
   * @param string $char
1829 2
   *
1830
   * @return string <p>'RTL' or 'LTR'</p>
1831 2
   */
1832 2
  public static function getCharDirection($char)
1833 1
  {
1834 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1835
      self::checkForSupport();
1836 2
    }
1837 1
1838 1
    if (self::$support['intlChar'] === true) {
1839
      $tmpReturn = \IntlChar::charDirection($char);
1840 2
1841 2
      // from "IntlChar"-Class
1842 2
      $charDirection = array(
1843
          'RTL' => array(1, 13, 14, 15, 21),
1844 2
          'LTR' => array(0, 11, 12, 20),
1845
      );
1846
1847
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1848
        return 'LTR';
1849
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1850
        return 'RTL';
1851
      }
1852
    }
1853
1854
    $c = static::chr_to_decimal($char);
1855
1856
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1857
      return 'LTR';
1858
    }
1859
1860
    if (0x85e >= $c) {
1861
1862
      if (0x5be === $c ||
1863
          0x5c0 === $c ||
1864
          0x5c3 === $c ||
1865
          0x5c6 === $c ||
1866
          (0x5d0 <= $c && 0x5ea >= $c) ||
1867
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1868
          0x608 === $c ||
1869
          0x60b === $c ||
1870
          0x60d === $c ||
1871
          0x61b === $c ||
1872
          (0x61e <= $c && 0x64a >= $c) ||
1873
          (0x66d <= $c && 0x66f >= $c) ||
1874
          (0x671 <= $c && 0x6d5 >= $c) ||
1875
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1876
          (0x6ee <= $c && 0x6ef >= $c) ||
1877
          (0x6fa <= $c && 0x70d >= $c) ||
1878
          0x710 === $c ||
1879
          (0x712 <= $c && 0x72f >= $c) ||
1880
          (0x74d <= $c && 0x7a5 >= $c) ||
1881
          0x7b1 === $c ||
1882
          (0x7c0 <= $c && 0x7ea >= $c) ||
1883
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1884
          0x7fa === $c ||
1885
          (0x800 <= $c && 0x815 >= $c) ||
1886
          0x81a === $c ||
1887
          0x824 === $c ||
1888
          0x828 === $c ||
1889
          (0x830 <= $c && 0x83e >= $c) ||
1890
          (0x840 <= $c && 0x858 >= $c) ||
1891
          0x85e === $c
1892
      ) {
1893
        return 'RTL';
1894
      }
1895
1896
    } elseif (0x200f === $c) {
1897
1898
      return 'RTL';
1899
1900
    } elseif (0xfb1d <= $c) {
1901
1902
      if (0xfb1d === $c ||
1903
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1904
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1905
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1906
          0xfb3e === $c ||
1907
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1908
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1909
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1910
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1911
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1912
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1913
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1914
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1915
          (0xfe76 <= $c && 0xfefc >= $c) ||
1916
          (0x10800 <= $c && 0x10805 >= $c) ||
1917
          0x10808 === $c ||
1918
          (0x1080a <= $c && 0x10835 >= $c) ||
1919
          (0x10837 <= $c && 0x10838 >= $c) ||
1920
          0x1083c === $c ||
1921
          (0x1083f <= $c && 0x10855 >= $c) ||
1922
          (0x10857 <= $c && 0x1085f >= $c) ||
1923
          (0x10900 <= $c && 0x1091b >= $c) ||
1924
          (0x10920 <= $c && 0x10939 >= $c) ||
1925
          0x1093f === $c ||
1926 9
          0x10a00 === $c ||
1927
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1928 9
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1929
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1930 9
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1931 6
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1932
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1933
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1934 9
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1935 7
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1936
          (0x10b78 <= $c && 0x10b7f >= $c)
1937
      ) {
1938
        return 'RTL';
1939 9
      }
1940 9
    }
1941
1942 9
    return 'LTR';
1943 9
  }
1944 9
1945 9
  /**
1946 9
   * get data from "/data/*.ser"
1947 6
   *
1948
   * @param string $file
1949
   *
1950 9
   * @return bool|string|array|int <p>Will return false on error.</p>
1951 2
   */
1952 2
  private static function getData($file)
1953
  {
1954 9
    $file = __DIR__ . '/data/' . $file . '.php';
1955 4
    if (file_exists($file)) {
1956 4
      /** @noinspection PhpIncludeInspection */
1957 4
      return require $file;
1958
    } else {
1959
      return false;
1960 4
    }
1961
  }
1962
1963 9
  /**
1964
   * alias for "UTF8::string_has_bom()"
1965 9
   *
1966 9
   * @see UTF8::string_has_bom()
1967
   *
1968 7
   * @param string $str
1969
   *
1970 7
   * @return bool
1971 6
   *
1972
   * @deprecated
1973 4
   */
1974
  public static function hasBom($str)
1975 9
  {
1976
    return self::string_has_bom($str);
1977 9
  }
1978
1979
  /**
1980 9
   * Converts a hexadecimal-value into an UTF-8 character.
1981 9
   *
1982 9
   * @param string $hexdec <p>The hexadecimal value.</p>
1983
   *
1984 9
   * @return string|false <p>One single UTF-8 character.</p>
1985
   */
1986 9
  public static function hex_to_chr($hexdec)
1987
  {
1988 9
    return self::decimal_to_chr(hexdec($hexdec));
1989
  }
1990
1991
  /**
1992
   * Converts hexadecimal U+xxxx code point representation to integer.
1993
   *
1994
   * INFO: opposite to UTF8::int_to_hex()
1995
   *
1996
   * @param string $hexdec <p>The hexadecimal code point representation.</p>
1997
   *
1998
   * @return int|false <p>The code point, or false on failure.</p>
1999
   */
2000
  public static function hex_to_int($hexdec)
2001
  {
2002
    if (!$hexdec) {
2003
      return false;
2004
    }
2005
2006
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexdec, $match)) {
2007
      return intval($match[1], 16);
2008
    }
2009
2010
    return false;
2011
  }
2012
2013
  /**
2014
   * alias for "UTF8::html_entity_decode()"
2015
   *
2016
   * @see UTF8::html_entity_decode()
2017
   *
2018
   * @param string $str
2019
   * @param int    $flags
2020
   * @param string $encoding
2021
   *
2022
   * @return string
2023
   */
2024
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2025
  {
2026
    return self::html_entity_decode($str, $flags, $encoding);
2027
  }
2028
2029
  /**
2030
   * Converts a UTF-8 string to a series of HTML numbered entities.
2031
   *
2032
   * INFO: opposite to UTF8::html_decode()
2033
   *
2034
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2035
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2036
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2037
   *
2038
   * @return string <p>HTML numbered entities.</p>
2039
   */
2040
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2041
  {
2042
    // init
2043
    $str = (string)$str;
2044
2045
    if (!isset($str[0])) {
2046
      return '';
2047
    }
2048
2049
    if ($encoding !== 'UTF-8') {
2050
      $encoding = self::normalize_encoding($encoding);
2051
    }
2052
2053
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2054
    if (function_exists('mb_encode_numericentity')) {
2055
2056
      $startCode = 0x00;
2057
      if ($keepAsciiChars === true) {
2058
        $startCode = 0x80;
2059
      }
2060
2061
      return mb_encode_numericentity(
2062
          $str,
2063
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2064
          $encoding
2065
      );
2066
    }
2067
2068
    return implode(
2069
        '',
2070
        array_map(
2071
            function ($data) use ($keepAsciiChars, $encoding) {
2072
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2073
            },
2074
            self::split($str)
2075
        )
2076
    );
2077
  }
2078
2079
  /**
2080
   * UTF-8 version of html_entity_decode()
2081
   *
2082
   * The reason we are not using html_entity_decode() by itself is because
2083
   * while it is not technically correct to leave out the semicolon
2084
   * at the end of an entity most browsers will still interpret the entity
2085
   * correctly. html_entity_decode() does not convert entities without
2086
   * semicolons, so we are left with our own little solution here. Bummer.
2087
   *
2088
   * Convert all HTML entities to their applicable characters
2089
   *
2090
   * INFO: opposite to UTF8::html_encode()
2091
   *
2092
   * @link http://php.net/manual/en/function.html-entity-decode.php
2093
   *
2094 2
   * @param string $str      <p>
2095
   *                         The input string.
2096 2
   *                         </p>
2097 1
   * @param int    $flags    [optional] <p>
2098 1
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2099
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2100 2
   *                         <table>
2101
   *                         Available <i>flags</i> constants
2102 2
   *                         <tr valign="top">
2103 1
   *                         <td>Constant Name</td>
2104
   *                         <td>Description</td>
2105
   *                         </tr>
2106 2
   *                         <tr valign="top">
2107 2
   *                         <td><b>ENT_COMPAT</b></td>
2108 2
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2109 2
   *                         </tr>
2110 2
   *                         <tr valign="top">
2111 1
   *                         <td><b>ENT_QUOTES</b></td>
2112
   *                         <td>Will convert both double and single quotes.</td>
2113 1
   *                         </tr>
2114 1
   *                         <tr valign="top">
2115 1
   *                         <td><b>ENT_NOQUOTES</b></td>
2116 1
   *                         <td>Will leave both double and single quotes unconverted.</td>
2117 1
   *                         </tr>
2118 2
   *                         <tr valign="top">
2119
   *                         <td><b>ENT_HTML401</b></td>
2120 2
   *                         <td>
2121
   *                         Handle code as HTML 4.01.
2122
   *                         </td>
2123
   *                         </tr>
2124
   *                         <tr valign="top">
2125
   *                         <td><b>ENT_XML1</b></td>
2126
   *                         <td>
2127
   *                         Handle code as XML 1.
2128
   *                         </td>
2129
   *                         </tr>
2130
   *                         <tr valign="top">
2131
   *                         <td><b>ENT_XHTML</b></td>
2132
   *                         <td>
2133
   *                         Handle code as XHTML.
2134
   *                         </td>
2135
   *                         </tr>
2136
   *                         <tr valign="top">
2137
   *                         <td><b>ENT_HTML5</b></td>
2138
   *                         <td>
2139
   *                         Handle code as HTML 5.
2140
   *                         </td>
2141
   *                         </tr>
2142
   *                         </table>
2143
   *                         </p>
2144
   * @param string $encoding [optional] <p>Encoding to use.</p>
2145
   *
2146
   * @return string <p>The decoded string.</p>
2147
   */
2148
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2149
  {
2150
    // init
2151
    $str = (string)$str;
2152
2153
    if (!isset($str[0])) {
2154
      return '';
2155
    }
2156
2157
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2158
      return $str;
2159
    }
2160
2161
    if (
2162
        strpos($str, '&') === false
2163
        ||
2164
        (
2165
            strpos($str, '&#') === false
2166
            &&
2167
            strpos($str, ';') === false
2168
        )
2169
    ) {
2170
      return $str;
2171
    }
2172
2173
    if ($encoding !== 'UTF-8') {
2174
      $encoding = self::normalize_encoding($encoding);
2175
    }
2176
2177
    if ($flags === null) {
2178
      if (Bootup::is_php('5.4') === true) {
2179
        $flags = ENT_QUOTES | ENT_HTML5;
2180
      } else {
2181
        $flags = ENT_QUOTES;
2182
      }
2183
    }
2184
2185
    do {
2186
      $str_compare = $str;
2187
2188
      $str = preg_replace_callback(
2189
          "/&#\d{2,6};/",
2190
          function ($matches) use ($encoding) {
2191
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2192
2193
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2194
              return $returnTmp;
2195
            } else {
2196
              return $matches[0];
2197
            }
2198
          },
2199
          $str
2200
      );
2201
2202
      // decode numeric & UTF16 two byte entities
2203
      $str = html_entity_decode(
2204
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2205
          $flags,
2206
          $encoding
2207
      );
2208
2209
    } while ($str_compare !== $str);
2210
2211
    return $str;
2212
  }
2213
2214
  /**
2215
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2216
   *
2217
   * @link http://php.net/manual/en/function.htmlentities.php
2218
   *
2219
   * @param string $str           <p>
2220
   *                              The input string.
2221
   *                              </p>
2222
   * @param int    $flags         [optional] <p>
2223
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2224
   *                              invalid code unit sequences and the used document type. The default is
2225
   *                              ENT_COMPAT | ENT_HTML401.
2226
   *                              <table>
2227
   *                              Available <i>flags</i> constants
2228
   *                              <tr valign="top">
2229
   *                              <td>Constant Name</td>
2230
   *                              <td>Description</td>
2231
   *                              </tr>
2232 1
   *                              <tr valign="top">
2233
   *                              <td><b>ENT_COMPAT</b></td>
2234 1
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2235
   *                              </tr>
2236
   *                              <tr valign="top">
2237
   *                              <td><b>ENT_QUOTES</b></td>
2238 1
   *                              <td>Will convert both double and single quotes.</td>
2239
   *                              </tr>
2240
   *                              <tr valign="top">
2241
   *                              <td><b>ENT_NOQUOTES</b></td>
2242
   *                              <td>Will leave both double and single quotes unconverted.</td>
2243
   *                              </tr>
2244
   *                              <tr valign="top">
2245
   *                              <td><b>ENT_IGNORE</b></td>
2246 1
   *                              <td>
2247
   *                              Silently discard invalid code unit sequences instead of returning
2248 1
   *                              an empty string. Using this flag is discouraged as it
2249
   *                              may have security implications.
2250
   *                              </td>
2251
   *                              </tr>
2252
   *                              <tr valign="top">
2253
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2254
   *                              <td>
2255
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2256
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2257
   *                              </td>
2258
   *                              </tr>
2259
   *                              <tr valign="top">
2260
   *                              <td><b>ENT_DISALLOWED</b></td>
2261 3
   *                              <td>
2262
   *                              Replace invalid code points for the given document type with a
2263 3
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2264 3
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2265
   *                              instance, to ensure the well-formedness of XML documents with
2266 3
   *                              embedded external content.
2267
   *                              </td>
2268 3
   *                              </tr>
2269
   *                              <tr valign="top">
2270
   *                              <td><b>ENT_HTML401</b></td>
2271
   *                              <td>
2272
   *                              Handle code as HTML 4.01.
2273
   *                              </td>
2274
   *                              </tr>
2275
   *                              <tr valign="top">
2276
   *                              <td><b>ENT_XML1</b></td>
2277
   *                              <td>
2278
   *                              Handle code as XML 1.
2279 1
   *                              </td>
2280
   *                              </tr>
2281 1
   *                              <tr valign="top">
2282
   *                              <td><b>ENT_XHTML</b></td>
2283
   *                              <td>
2284
   *                              Handle code as XHTML.
2285
   *                              </td>
2286
   *                              </tr>
2287
   *                              <tr valign="top">
2288
   *                              <td><b>ENT_HTML5</b></td>
2289 2
   *                              <td>
2290
   *                              Handle code as HTML 5.
2291 2
   *                              </td>
2292
   *                              </tr>
2293
   *                              </table>
2294
   *                              </p>
2295
   * @param string $encoding      [optional] <p>
2296
   *                              Like <b>htmlspecialchars</b>,
2297
   *                              <b>htmlentities</b> takes an optional third argument
2298
   *                              <i>encoding</i> which defines encoding used in
2299
   *                              conversion.
2300
   *                              Although this argument is technically optional, you are highly
2301
   *                              encouraged to specify the correct value for your code.
2302
   *                              </p>
2303 2
   * @param bool   $double_encode [optional] <p>
2304
   *                              When <i>double_encode</i> is turned off PHP will not
2305 2
   *                              encode existing html entities. The default is to convert everything.
2306
   *                              </p>
2307
   *
2308
   *
2309
   * @return string the encoded string.
2310
   * </p>
2311
   * <p>
2312
   * If the input <i>string</i> contains an invalid code unit
2313
   * sequence within the given <i>encoding</i> an empty string
2314
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2315
   * <b>ENT_SUBSTITUTE</b> flags are set.
2316
   */
2317 1
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2318
  {
2319 1
    if ($encoding !== 'UTF-8') {
2320
      $encoding = self::normalize_encoding($encoding);
2321
    }
2322
2323
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2324
2325
    if ($encoding !== 'UTF-8') {
2326
      return $str;
2327
    }
2328
2329
    $byteLengths = self::chr_size_list($str);
2330
    $search = array();
2331
    $replacements = array();
2332
    foreach ($byteLengths as $counter => $byteLength) {
2333
      if ($byteLength >= 3) {
2334
        $char = self::access($str, $counter);
2335
2336
        if (!isset($replacements[$char])) {
2337
          $search[$char] = $char;
2338
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2334 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2339
        }
2340
      }
2341
    }
2342
2343
    return str_replace($search, $replacements, $str);
2344
  }
2345
2346
  /**
2347
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2348
   *
2349
   * INFO: Take a look at "UTF8::htmlentities()"
2350
   *
2351
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2352
   *
2353
   * @param string $str           <p>
2354
   *                              The string being converted.
2355
   *                              </p>
2356
   * @param int    $flags         [optional] <p>
2357
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2358
   *                              invalid code unit sequences and the used document type. The default is
2359 1
   *                              ENT_COMPAT | ENT_HTML401.
2360
   *                              <table>
2361 1
   *                              Available <i>flags</i> constants
2362
   *                              <tr valign="top">
2363
   *                              <td>Constant Name</td>
2364
   *                              <td>Description</td>
2365
   *                              </tr>
2366
   *                              <tr valign="top">
2367
   *                              <td><b>ENT_COMPAT</b></td>
2368
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2369
   *                              </tr>
2370
   *                              <tr valign="top">
2371
   *                              <td><b>ENT_QUOTES</b></td>
2372
   *                              <td>Will convert both double and single quotes.</td>
2373
   *                              </tr>
2374
   *                              <tr valign="top">
2375
   *                              <td><b>ENT_NOQUOTES</b></td>
2376
   *                              <td>Will leave both double and single quotes unconverted.</td>
2377
   *                              </tr>
2378
   *                              <tr valign="top">
2379
   *                              <td><b>ENT_IGNORE</b></td>
2380
   *                              <td>
2381
   *                              Silently discard invalid code unit sequences instead of returning
2382
   *                              an empty string. Using this flag is discouraged as it
2383
   *                              may have security implications.
2384
   *                              </td>
2385
   *                              </tr>
2386
   *                              <tr valign="top">
2387 1
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2388
   *                              <td>
2389 1
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2390
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2391
   *                              </td>
2392
   *                              </tr>
2393
   *                              <tr valign="top">
2394
   *                              <td><b>ENT_DISALLOWED</b></td>
2395
   *                              <td>
2396
   *                              Replace invalid code points for the given document type with a
2397
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2398
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2399
   *                              instance, to ensure the well-formedness of XML documents with
2400
   *                              embedded external content.
2401 1
   *                              </td>
2402
   *                              </tr>
2403 1
   *                              <tr valign="top">
2404
   *                              <td><b>ENT_HTML401</b></td>
2405
   *                              <td>
2406
   *                              Handle code as HTML 4.01.
2407
   *                              </td>
2408
   *                              </tr>
2409
   *                              <tr valign="top">
2410
   *                              <td><b>ENT_XML1</b></td>
2411
   *                              <td>
2412
   *                              Handle code as XML 1.
2413
   *                              </td>
2414
   *                              </tr>
2415
   *                              <tr valign="top">
2416 16
   *                              <td><b>ENT_XHTML</b></td>
2417
   *                              <td>
2418 16
   *                              Handle code as XHTML.
2419
   *                              </td>
2420
   *                              </tr>
2421
   *                              <tr valign="top">
2422
   *                              <td><b>ENT_HTML5</b></td>
2423
   *                              <td>
2424
   *                              Handle code as HTML 5.
2425
   *                              </td>
2426
   *                              </tr>
2427
   *                              </table>
2428
   *                              </p>
2429
   * @param string $encoding      [optional] <p>
2430
   *                              Defines encoding used in conversion.
2431 28
   *                              </p>
2432
   *                              <p>
2433 28
   *                              For the purposes of this function, the encodings
2434
   *                              ISO-8859-1, ISO-8859-15,
2435 28
   *                              UTF-8, cp866,
2436 5
   *                              cp1251, cp1252, and
2437
   *                              KOI8-R are effectively equivalent, provided the
2438
   *                              <i>string</i> itself is valid for the encoding, as
2439 28
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2440
   *                              the same positions in all of these encodings.
2441
   *                              </p>
2442
   * @param bool   $double_encode [optional] <p>
2443
   *                              When <i>double_encode</i> is turned off PHP will not
2444
   *                              encode existing html entities, the default is to convert everything.
2445
   *                              </p>
2446
   *
2447
   * @return string The converted string.
2448
   * </p>
2449 1
   * <p>
2450
   * If the input <i>string</i> contains an invalid code unit
2451 1
   * sequence within the given <i>encoding</i> an empty string
2452
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2453 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
2454 1
   */
2455
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2456
  {
2457 1
    if ($encoding !== 'UTF-8') {
2458 1
      $encoding = self::normalize_encoding($encoding);
2459
    }
2460 1
2461
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2462
  }
2463
2464
  /**
2465
   * Checks whether iconv is available on the server.
2466
   *
2467
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2468
   */
2469
  public static function iconv_loaded()
2470
  {
2471 16
    $return = extension_loaded('iconv') ? true : false;
2472
2473
    // INFO: "default_charset" is already set by the "Bootup"-class
2474 16
2475
    if (!Bootup::is_php('5.6')) {
2476
      // INFO: "iconv_set_encoding" is deprecated cince PHP 5.6
2477 16
      iconv_set_encoding('input_encoding', 'UTF-8');
2478
      iconv_set_encoding('output_encoding', 'UTF-8');
2479 16
      iconv_set_encoding('internal_encoding', 'UTF-8');
2480 16
    }
2481 15
2482 16
    return $return;
2483 6
  }
2484
2485 15
  /**
2486
   * alias for "UTF8::decimal_to_chr()"
2487
   *
2488
   * @see UTF8::decimal_to_chr()
2489
   *
2490
   * @param int $int
2491
   *
2492
   * @return string
2493
   */
2494
  public static function int_to_chr($int)
2495
  {
2496
    return self::decimal_to_chr($int);
2497
  }
2498
2499
  /**
2500
   * Converts Integer to hexadecimal U+xxxx code point representation.
2501
   *
2502
   * INFO: opposite to UTF8::hex_to_int()
2503
   *
2504
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2505
   * @param string $pfix [optional]
2506
   *
2507
   * @return string <p>The code point, or empty string on failure.</p>
2508
   */
2509
  public static function int_to_hex($int, $pfix = 'U+')
2510
  {
2511
    if (ctype_digit((string)$int)) {
2512
      $hex = dechex((int)$int);
2513
2514
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2515
2516
      return $pfix . $hex;
2517
    }
2518
2519
    return '';
2520
  }
2521
2522
  /**
2523
   * Checks whether intl-char is available on the server.
2524
   *
2525
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2526
   */
2527
  public static function intlChar_loaded()
2528
  {
2529
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2530
  }
2531
2532
  /**
2533
   * Checks whether intl is available on the server.
2534
   *
2535
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2536 1
   */
2537
  public static function intl_loaded()
2538 1
  {
2539
    return extension_loaded('intl') ? true : false;
2540 1
  }
2541
2542
  /**
2543
   * alias for "UTF8::is_ascii()"
2544
   *
2545 1
   * @see UTF8::is_ascii()
2546
   *
2547 1
   * @param string $str
2548
   *
2549 1
   * @return boolean
2550 1
   *
2551
   * @deprecated
2552 1
   */
2553
  public static function isAscii($str)
2554
  {
2555
    return self::is_ascii($str);
2556
  }
2557
2558
  /**
2559
   * alias for "UTF8::is_base64()"
2560
   *
2561
   * @see UTF8::is_base64()
2562
   *
2563 1
   * @param string $str
2564
   *
2565 1
   * @return bool
2566
   *
2567 1
   * @deprecated
2568
   */
2569
  public static function isBase64($str)
2570
  {
2571
    return self::is_base64($str);
2572 1
  }
2573 1
2574 1
  /**
2575 1
   * alias for "UTF8::is_binary()"
2576 1
   *
2577
   * @see UTF8::is_binary()
2578 1
   *
2579
   * @param string $str
2580
   *
2581
   * @return bool
2582
   *
2583
   * @deprecated
2584
   */
2585
  public static function isBinary($str)
2586
  {
2587
    return self::is_binary($str);
2588
  }
2589
2590
  /**
2591
   * alias for "UTF8::is_bom()"
2592
   *
2593 4
   * @see UTF8::is_bom()
2594
   *
2595 4
   * @param string $utf8_chr
2596
   *
2597 4
   * @return boolean
2598
   *
2599 4
   * @deprecated
2600 4
   */
2601 4
  public static function isBom($utf8_chr)
2602 4
  {
2603 4
    return self::is_bom($utf8_chr);
2604 4
  }
2605 4
2606 4
  /**
2607 4
   * alias for "UTF8::is_html()"
2608 2
   *
2609 2
   * @see UTF8::is_html()
2610 4
   *
2611 4
   * @param string $str
2612 4
   *
2613
   * @return boolean
2614 4
   *
2615 4
   * @deprecated
2616 4
   */
2617 4
  public static function isHtml($str)
2618 4
  {
2619 4
    return self::is_html($str);
2620 4
  }
2621 4
2622 4
  /**
2623 3
   * alias for "UTF8::is_json()"
2624 3
   *
2625 4
   * @see UTF8::is_json()
2626 4
   *
2627 4
   * @param string $str
2628
   *
2629 4
   * @return bool
2630 3
   *
2631 2
   * @deprecated
2632
   */
2633 3
  public static function isJson($str)
2634
  {
2635
    return self::is_json($str);
2636
  }
2637 3
2638
  /**
2639 3
   * alias for "UTF8::is_utf16()"
2640
   *
2641
   * @see UTF8::is_utf16()
2642
   *
2643
   * @param string $str
2644
   *
2645
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2646
   *
2647
   * @deprecated
2648
   */
2649
  public static function isUtf16($str)
2650
  {
2651
    return self::is_utf16($str);
2652
  }
2653 3
2654
  /**
2655 3
   * alias for "UTF8::is_utf32()"
2656
   *
2657 3
   * @see UTF8::is_utf32()
2658
   *
2659 3
   * @param string $str
2660 3
   *
2661 3
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2662 3
   *
2663 3
   * @deprecated
2664 3
   */
2665 3
  public static function isUtf32($str)
2666 3
  {
2667 3
    return self::is_utf32($str);
2668 1
  }
2669 1
2670 3
  /**
2671 3
   * alias for "UTF8::is_utf8()"
2672 3
   *
2673
   * @see UTF8::is_utf8()
2674 3
   *
2675 3
   * @param string $str
2676 3
   * @param bool   $strict
2677 3
   *
2678 3
   * @return bool
2679 3
   *
2680 3
   * @deprecated
2681 3
   */
2682 3
  public static function isUtf8($str, $strict = false)
2683 1
  {
2684 1
    return self::is_utf8($str, $strict);
2685 3
  }
2686 3
2687 3
  /**
2688
   * Checks if a string is 7 bit ASCII.
2689 3
   *
2690 1
   * @param string $str <p>The string to check.</p>
2691 1
   *
2692
   * @return bool <p>
2693 1
   *              <strong>true</strong> if it is ASCII<br />
2694
   *              <strong>false</strong> otherwise
2695
   *              </p>
2696
   */
2697 3
  public static function is_ascii($str)
2698
  {
2699 3
    $str = (string)$str;
2700
2701
    if (!isset($str[0])) {
2702
      return true;
2703
    }
2704
2705
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2706
  }
2707
2708
  /**
2709
   * Returns true if the string is base64 encoded, false otherwise.
2710
   *
2711
   * @param string $str <p>The input string.</p>
2712 43
   *
2713
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2714 43
   */
2715
  public static function is_base64($str)
2716 43
  {
2717 3
    $str = (string)$str;
2718
2719
    if (!isset($str[0])) {
2720 41
      return false;
2721 1
    }
2722 1
2723
    if (base64_encode(base64_decode($str, true)) === $str) {
2724
      return true;
2725
    } else {
2726
      return false;
2727
    }
2728
  }
2729
2730 41
  /**
2731
   * Check if the input is binary... (is look like a hack).
2732
   *
2733
   * @param mixed $input
2734
   *
2735
   * @return bool
2736
   */
2737
  public static function is_binary($input)
2738
  {
2739
2740 41
    $testLength = strlen($input);
2741
2742 41
    if (
2743 41
        preg_match('~^[01]+$~', $input)
2744 41
        ||
2745
        substr_count($input, "\x00") > 0
2746
        ||
2747 41
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
2748 41
    ) {
2749 41
      return true;
2750
    } else {
2751
      return false;
2752 41
    }
2753
  }
2754 36
2755 41
  /**
2756
   * Check if the file is binary.
2757 34
   *
2758 34
   * @param string $file
2759 34
   *
2760 34
   * @return boolean
2761 39
   */
2762
  public static function is_binary_file($file)
2763 21
  {
2764 21
    try {
2765 21
      $fp = fopen($file, 'r');
2766 21
      $block = fread($fp, 512);
2767 33
      fclose($fp);
2768
    } catch (\Exception $e) {
2769 9
      $block = '';
2770 9
    }
2771 9
2772 9
    return self::is_binary($block);
2773 16
  }
2774
2775
  /**
2776
   * Checks if the given string is equal to any "Byte Order Mark".
2777
   *
2778
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2779
   *
2780
   * @param string $str <p>The input string.</p>
2781
   *
2782 3
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2783 3
   */
2784 3
  public static function is_bom($str)
2785 3
  {
2786 9
    foreach (self::$bom as $bomString => $bomByteLength) {
2787
      if ($str === $bomString) {
2788 3
        return true;
2789 3
      }
2790 3
    }
2791 3
2792 3
    return false;
2793
  }
2794
2795
  /**
2796 5
   * Check if the string contains any html-tags <lall>.
2797
   *
2798 41
   * @param string $str <p>The input string.</p>
2799
   *
2800
   * @return boolean
2801 36
   */
2802
  public static function is_html($str)
2803 33
  {
2804 33
    $str = (string)$str;
2805 33
2806 33
    if (!isset($str[0])) {
2807
      return false;
2808
    }
2809
2810
    // init
2811 33
    $matches = array();
2812
2813
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
2814
2815
    if (count($matches) == 0) {
2816
      return false;
2817 33
    } else {
2818 33
      return true;
2819 33
    }
2820 33
  }
2821
2822 33
  /**
2823
   * Try to check if "$str" is an json-string.
2824 33
   *
2825 33
   * @param string $str <p>The input string.</p>
2826 5
   *
2827
   * @return bool
2828
   */
2829 33
  public static function is_json($str)
2830 33
  {
2831 33
    $str = (string)$str;
2832 33
2833 33
    if (!isset($str[0])) {
2834
      return false;
2835
    }
2836
2837
    if (
2838 18
        is_object(self::json_decode($str))
2839
        &&
2840
        json_last_error() === JSON_ERROR_NONE
2841 41
    ) {
2842
      return true;
2843 20
    } else {
2844
      return false;
2845
    }
2846
  }
2847
2848
  /**
2849
   * Check if the string is UTF-16.
2850
   *
2851
   * @param string $str <p>The input string.</p>
2852
   *
2853
   * @return int|false <p>
2854
   *                   <strong>false</strong> if is't not UTF-16,<br />
2855
   *                   <strong>1</strong> for UTF-16LE,<br />
2856
   *                   <strong>2</strong> for UTF-16BE.
2857
   *                   </p>
2858
   */
2859 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2860
  {
2861
    $str = self::remove_bom($str);
2862
2863
    if (self::is_binary($str)) {
2864
2865
      $maybeUTF16LE = 0;
2866
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2867
      if ($test) {
2868
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2869
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2870
        if ($test3 === $test) {
2871
          $strChars = self::count_chars($str, true);
2872
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2873
            if (in_array($test3char, $strChars, true) === true) {
2874
              $maybeUTF16LE++;
2875
            }
2876
          }
2877
        }
2878
      }
2879
2880
      $maybeUTF16BE = 0;
2881
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2882
      if ($test) {
2883 2
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2884
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2885 2
        if ($test3 === $test) {
2886
          $strChars = self::count_chars($str, true);
2887 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2888 2
            if (in_array($test3char, $strChars, true) === true) {
2889 2
              $maybeUTF16BE++;
2890
            }
2891
          }
2892
        }
2893 2
      }
2894
2895
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2896
        if ($maybeUTF16LE > $maybeUTF16BE) {
2897
          return 1;
2898
        } else {
2899
          return 2;
2900
        }
2901
      }
2902
2903
    }
2904
2905
    return false;
2906
  }
2907
2908
  /**
2909
   * Check if the string is UTF-32.
2910
   *
2911
   * @param string $str
2912
   *
2913
   * @return int|false <p>
2914
   *                   <strong>false</strong> if is't not UTF-16,<br />
2915
   *                   <strong>1</strong> for UTF-32LE,<br />
2916
   *                   <strong>2</strong> for UTF-32BE.
2917
   *                   </p>
2918
   */
2919 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2920
  {
2921
    $str = self::remove_bom($str);
2922
2923
    if (self::is_binary($str)) {
2924
2925
      $maybeUTF32LE = 0;
2926
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2927
      if ($test) {
2928
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2929
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2930
        if ($test3 === $test) {
2931
          $strChars = self::count_chars($str, true);
2932 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2933
            if (in_array($test3char, $strChars, true) === true) {
2934 2
              $maybeUTF32LE++;
2935
            }
2936 2
          }
2937
        }
2938
      }
2939 2
2940
      $maybeUTF32BE = 0;
2941
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2942 2
      if ($test) {
2943
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2944
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2945
        if ($test3 === $test) {
2946
          $strChars = self::count_chars($str, true);
2947
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2948
            if (in_array($test3char, $strChars, true) === true) {
2949
              $maybeUTF32BE++;
2950
            }
2951
          }
2952 6
        }
2953
      }
2954 6
2955
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2956
        if ($maybeUTF32LE > $maybeUTF32BE) {
2957
          return 1;
2958
        } else {
2959
          return 2;
2960
        }
2961
      }
2962
2963
    }
2964
2965 24
    return false;
2966
  }
2967 24
2968
  /**
2969 24
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2970 2
   *
2971
   * @see    http://hsivonen.iki.fi/php-utf8/
2972
   *
2973
   * @param string $str    <p>The string to be checked.</p>
2974 23
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2975 2
   *
2976
   * @return bool
2977
   */
2978 23
  public static function is_utf8($str, $strict = false)
2979
  {
2980 23
    $str = (string)$str;
2981
2982
    if (!isset($str[0])) {
2983
      return true;
2984
    }
2985
2986
    if ($strict === true) {
2987
      if (self::is_utf16($str) !== false) {
2988
        return false;
2989
      }
2990 1
2991
      if (self::is_utf32($str) !== false) {
2992 1
        return false;
2993
      }
2994
    }
2995
2996 1
    if (self::pcre_utf8_support() !== true) {
2997
2998
      // If even just the first character can be matched, when the /u
2999
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3000
      // invalid, nothing at all will match, even if the string contains
3001
      // some valid sequences
3002
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3003
3004
    } else {
3005
3006
      $mState = 0; // cached expected number of octets after the current octet
3007 1
      // until the beginning of the next UTF8 character sequence
3008
      $mUcs4 = 0; // cached Unicode character
3009 1
      $mBytes = 1; // cached expected number of octets in the current sequence
3010 1
      $len = strlen($str);
3011 1
3012
      /** @noinspection ForeachInvariantsInspection */
3013 1
      for ($i = 0; $i < $len; $i++) {
3014
        $in = ord($str[$i]);
3015
        if ($mState === 0) {
3016
          // When mState is zero we expect either a US-ASCII character or a
3017
          // multi-octet sequence.
3018
          if (0 === (0x80 & $in)) {
3019
            // US-ASCII, pass straight through.
3020
            $mBytes = 1;
3021 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3022 2
            // First octet of 2 octet sequence.
3023
            $mUcs4 = $in;
3024 2
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3025
            $mState = 1;
3026 2
            $mBytes = 2;
3027 2
          } elseif (0xE0 === (0xF0 & $in)) {
3028 2
            // First octet of 3 octet sequence.
3029
            $mUcs4 = $in;
3030 2
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3031
            $mState = 2;
3032
            $mBytes = 3;
3033 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3034
            // First octet of 4 octet sequence.
3035
            $mUcs4 = $in;
3036
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3037
            $mState = 3;
3038
            $mBytes = 4;
3039
          } elseif (0xF8 === (0xFC & $in)) {
3040 1
            /* First octet of 5 octet sequence.
3041
            *
3042 1
            * This is illegal because the encoded codepoint must be either
3043
            * (a) not the shortest form or
3044
            * (b) outside the Unicode range of 0-0x10FFFF.
3045
            * Rather than trying to resynchronize, we will carry on until the end
3046 1
            * of the sequence and let the later error handling code catch it.
3047
            */
3048
            $mUcs4 = $in;
3049
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3050
            $mState = 4;
3051
            $mBytes = 5;
3052 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3053
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3054
            $mUcs4 = $in;
3055
            $mUcs4 = ($mUcs4 & 1) << 30;
3056
            $mState = 5;
3057
            $mBytes = 6;
3058 1
          } else {
3059
            /* Current octet is neither in the US-ASCII range nor a legal first
3060 1
             * octet of a multi-octet sequence.
3061
             */
3062
            return false;
3063
          }
3064
        } else {
3065
          // When mState is non-zero, we expect a continuation of the multi-octet
3066
          // sequence
3067
          if (0x80 === (0xC0 & $in)) {
3068
            // Legal continuation.
3069
            $shift = ($mState - 1) * 6;
3070 16
            $tmp = $in;
3071
            $tmp = ($tmp & 0x0000003F) << $shift;
3072 16
            $mUcs4 |= $tmp;
3073
            /**
3074 16
             * End of the multi-octet sequence. mUcs4 now contains the final
3075 2
             * Unicode code point to be output
3076
             */
3077
            if (0 === --$mState) {
3078 16
              /*
3079 1
              * Check for illegal sequences and code points.
3080
              */
3081
              // From Unicode 3.1, non-shortest form is illegal
3082 16
              if (
3083 4
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3084
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3085
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3086 15
                  (4 < $mBytes) ||
3087 14
                  // From Unicode 3.2, surrogate characters are illegal.
3088
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3089
                  // Code points outside the Unicode range are illegal.
3090 4
                  ($mUcs4 > 0x10FFFF)
3091 4
              ) {
3092 4
                return false;
3093
              }
3094
              // initialize UTF8 cache
3095 4
              $mState = 0;
3096 4
              $mUcs4 = 0;
3097 4
              $mBytes = 1;
3098 4
            }
3099 4
          } else {
3100 4
            /**
3101 4
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3102 4
             * Incomplete multi-octet sequence.
3103 4
             */
3104 4
            return false;
3105 4
          }
3106 4
        }
3107 4
      }
3108 4
3109 4
      return true;
3110
    }
3111 4
  }
3112 4
3113 4
  /**
3114
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3115 4
   * Decodes a JSON string
3116
   *
3117 4
   * @link http://php.net/manual/en/function.json-decode.php
3118
   *
3119
   * @param string $json    <p>
3120
   *                        The <i>json</i> string being decoded.
3121
   *                        </p>
3122
   *                        <p>
3123
   *                        This function only works with UTF-8 encoded strings.
3124
   *                        </p>
3125
   *                        <p>PHP implements a superset of
3126
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3127 13
   *                        only supports these values when they are nested inside an array or an object.
3128
   *                        </p>
3129 13
   * @param bool   $assoc   [optional] <p>
3130 13
   *                        When <b>TRUE</b>, returned objects will be converted into
3131
   *                        associative arrays.
3132 13
   *                        </p>
3133 1
   * @param int    $depth   [optional] <p>
3134 1
   *                        User specified recursion depth.
3135 1
   *                        </p>
3136
   * @param int    $options [optional] <p>
3137 13
   *                        Bitmask of JSON decode options. Currently only
3138
   *                        <b>JSON_BIGINT_AS_STRING</b>
3139
   *                        is supported (default is to cast large integers as floats)
3140
   *                        </p>
3141
   *
3142
   * @return mixed the value encoded in <i>json</i> in appropriate
3143
   * PHP type. Values true, false and
3144
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3145
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3146
   * <i>json</i> cannot be decoded or if the encoded
3147
   * data is deeper than the recursion limit.
3148
   */
3149
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3150 18
  {
3151
    $json = self::filter($json);
3152 18
3153 18
    if (Bootup::is_php('5.4') === true) {
3154
      $json = json_decode($json, $assoc, $depth, $options);
3155 18
    } else {
3156
      $json = json_decode($json, $assoc, $depth);
3157 18
    }
3158
3159 2
    return $json;
3160
  }
3161 2
3162
  /**
3163 1
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3164 1
   * Returns the JSON representation of a value.
3165
   *
3166 2
   * @link http://php.net/manual/en/function.json-encode.php
3167 2
   *
3168
   * @param mixed $value   <p>
3169 18
   *                       The <i>value</i> being encoded. Can be any type except
3170 18
   *                       a resource.
3171 1
   *                       </p>
3172 1
   *                       <p>
3173
   *                       All string data must be UTF-8 encoded.
3174 18
   *                       </p>
3175 18
   *                       <p>PHP implements a superset of
3176
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3177 18
   *                       only supports these values when they are nested inside an array or an object.
3178
   *                       </p>
3179
   * @param int   $options [optional] <p>
3180
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3181
   *                       <b>JSON_HEX_TAG</b>,
3182
   *                       <b>JSON_HEX_AMP</b>,
3183
   *                       <b>JSON_HEX_APOS</b>,
3184
   *                       <b>JSON_NUMERIC_CHECK</b>,
3185
   *                       <b>JSON_PRETTY_PRINT</b>,
3186
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3187
   *                       <b>JSON_FORCE_OBJECT</b>,
3188
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3189
   *                       constants is described on
3190
   *                       the JSON constants page.
3191
   *                       </p>
3192
   * @param int   $depth   [optional] <p>
3193
   *                       Set the maximum depth. Must be greater than zero.
3194
   *                       </p>
3195
   *
3196
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3197
   */
3198
  public static function json_encode($value, $options = 0, $depth = 512)
3199
  {
3200
    $value = self::filter($value);
3201
3202
    if (Bootup::is_php('5.5')) {
3203
      $json = json_encode($value, $options, $depth);
3204
    } else {
3205
      $json = json_encode($value, $options);
3206
    }
3207
3208
    return $json;
3209
  }
3210
3211
  /**
3212
   * Makes string's first char lowercase.
3213
   *
3214
   * @param string $str <p>The input string</p>
3215
   *
3216
   * @return string <p>The resulting string</p>
3217
   */
3218
  public static function lcfirst($str)
3219
  {
3220
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3221
  }
3222
3223
  /**
3224
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3225
   *
3226
   * @param string $str   <p>The string to be trimmed</p>
3227
   * @param string $chars <p>Optional characters to be stripped</p>
3228
   *
3229
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3230 17
   */
3231 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3232 17
  {
3233 3
    $str = (string)$str;
3234
3235
    if (!isset($str[0])) {
3236 16
      return '';
3237
    }
3238
3239
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3240 16
    if ($chars === INF || !$chars) {
3241
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3242
    }
3243
3244
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3245
  }
3246
3247
  /**
3248 16
   * Returns the UTF-8 character with the maximum code point in the given data.
3249 16
   *
3250 15
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3251
   *
3252
   * @return string <p>The character with the highest code point than others.</p>
3253 9
   */
3254 9 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3255 9
  {
3256
    if (is_array($arg)) {
3257 9
      $arg = implode('', $arg);
3258 1
    }
3259
3260
    return self::chr(max(self::codepoints($arg)));
3261 9
  }
3262 4
3263
  /**
3264
   * Calculates and returns the maximum number of bytes taken by any
3265 9
   * UTF-8 encoded character in the given string.
3266 5
   *
3267
   * @param string $str <p>The original Unicode string.</p>
3268
   *
3269 9
   * @return int <p>Max byte lengths of the given chars.</p>
3270
   */
3271
  public static function max_chr_width($str)
3272
  {
3273
    $bytes = self::chr_size_list($str);
3274
    if (count($bytes) > 0) {
3275
      return (int)max($bytes);
3276
    } else {
3277
      return 0;
3278
    }
3279
  }
3280
3281
  /**
3282
   * Checks whether mbstring is available on the server.
3283
   *
3284
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3285 1
   */
3286
  public static function mbstring_loaded()
3287
  {
3288 1
    $return = extension_loaded('mbstring') ? true : false;
3289
3290 1
    if ($return === true) {
3291 1
      \mb_internal_encoding('UTF-8');
3292 1
    }
3293
3294
    return $return;
3295 1
  }
3296
3297
  /**
3298
   * Returns the UTF-8 character with the minimum code point in the given data.
3299
   *
3300
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3301
   *
3302
   * @return string <p>The character with the lowest code point than others.</p>
3303 41
   */
3304 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3305
  {
3306 41
    if (is_array($arg)) {
3307
      $arg = implode('', $arg);
3308
    }
3309
3310
    return self::chr(min(self::codepoints($arg)));
3311
  }
3312
3313
  /**
3314
   * alias for "UTF8::normalize_encoding()"
3315
   *
3316
   * @see UTF8::normalize_encoding()
3317 1
   *
3318
   * @param string $encoding
3319 1
   *
3320 1
   * @return string
3321
   *
3322
   * @deprecated
3323 1
   */
3324 1
  public static function normalizeEncoding($encoding)
3325 1
  {
3326
    return self::normalize_encoding($encoding);
3327
  }
3328 1
3329
  /**
3330
   * Normalize the encoding-"name" input.
3331 1
   *
3332
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3333
   *
3334
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3335 1
   */
3336 1
  public static function normalize_encoding($encoding)
3337 1
  {
3338
    static $staticNormalizeEncodingCache = array();
3339
3340 1
    if (!$encoding) {
3341
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3342
    }
3343 1
3344
    if ('UTF-8' === $encoding) {
3345
      return $encoding;
3346
    }
3347 1
3348
    if (in_array($encoding, self::$iconvEncoding, true)) {
3349 1
      return $encoding;
3350 1
    }
3351 1
3352 1
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3353 1
      return $staticNormalizeEncodingCache[$encoding];
3354
    }
3355
3356
    $encodingOrig = $encoding;
3357
    $encoding = strtoupper($encoding);
3358
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3359
3360
    $equivalences = array(
3361
        'ISO88591'    => 'ISO-8859-1',
3362
        'ISO8859'     => 'ISO-8859-1',
3363
        'ISO'         => 'ISO-8859-1',
3364
        'LATIN1'      => 'ISO-8859-1',
3365 5
        'LATIN'       => 'ISO-8859-1',
3366
        'WIN1252'     => 'ISO-8859-1',
3367 5
        'WINDOWS1252' => 'ISO-8859-1',
3368
        'UTF16'       => 'UTF-16',
3369
        'UTF32'       => 'UTF-32',
3370
        'UTF8'        => 'UTF-8',
3371
        'UTF'         => 'UTF-8',
3372
        'UTF7'        => 'UTF-7',
3373
        '8BIT'        => 'CP850',
3374
        'BINARY'      => 'CP850',
3375
    );
3376
3377 10
    if (!empty($equivalences[$encodingUpperHelper])) {
3378
      $encoding = $equivalences[$encodingUpperHelper];
3379 10
    }
3380 10
3381 5
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3382 5
3383 10
    return $encoding;
3384
  }
3385 10
3386
  /**
3387
   * Normalize some MS Word special characters.
3388
   *
3389
   * @param string $str <p>The string to be normalized.</p>
3390
   *
3391
   * @return string
3392
   */
3393 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3394
  {
3395
    // init
3396 1
    $str = (string)$str;
3397
3398 1
    if (!isset($str[0])) {
3399 1
      return '';
3400 1
    }
3401
3402 1
    static $UTF8_MSWORD_KEYS_CACHE = null;
3403 1
    static $UTF8_MSWORD_VALUES_CACHE = null;
3404 1
3405 1
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3406 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3407
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3408 1
    }
3409
3410
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3411
  }
3412
3413
  /**
3414
   * Normalize the whitespace.
3415
   *
3416
   * @param string $str                     <p>The string to be normalized.</p>
3417
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3418
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3419
   *                                        bidirectional text chars.</p>
3420
   *
3421
   * @return string
3422
   */
3423
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3424 45
  {
3425
    // init
3426
    $str = (string)$str;
3427 45
3428
    if (!isset($str[0])) {
3429
      return '';
3430
    }
3431 45
3432 45
    static $WHITESPACE_CACHE = array();
3433 45
    $cacheKey = (int)$keepNonBreakingSpace;
3434 45
3435
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3436 45
3437
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3438
3439 45
      if ($keepNonBreakingSpace === true) {
3440 45
        /** @noinspection OffsetOperationsInspection */
3441
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3442 45
      }
3443
3444
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3445
    }
3446
3447
    if ($keepBidiUnicodeControls === false) {
3448
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3449
3450
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3451
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3452
      }
3453 45
3454
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3455 45
    }
3456
3457 45
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3458 45
  }
3459 45
3460
  /**
3461 45
   * Format a number with grouped thousands.
3462 45
   *
3463 45
   * @param float  $number
3464
   * @param int    $decimals
3465 45
   * @param string $dec_point
3466
   * @param string $thousands_sep
3467
   *
3468
   * @return string
3469
   *    *
3470
   * @deprecated Because this has nothing to do with UTF8. :/
3471
   */
3472
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3473
  {
3474
    $thousands_sep = (string)$thousands_sep;
3475
    $dec_point = (string)$dec_point;
3476 23
3477
    if (
3478 23
        isset($thousands_sep[1], $dec_point[1])
3479
        &&
3480 23
        Bootup::is_php('5.4') === true
3481 5
    ) {
3482
      return str_replace(
3483
          array(
3484
              '.',
3485 19
              ',',
3486 3
          ),
3487
          array(
3488
              $dec_point,
3489 18
              $thousands_sep,
3490
          ),
3491 18
          number_format($number, $decimals, '.', ',')
3492
      );
3493
    }
3494
3495
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3496
  }
3497
3498
  /**
3499
   * Calculates Unicode code point of the given UTF-8 encoded character.
3500
   *
3501
   * INFO: opposite to UTF8::chr()
3502 52
   *
3503
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3504 52
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3505
   *
3506 52
   * @return int <p>
3507
   *             Unicode code point of the given character,<br />
3508 52
   *             0 on invalid UTF-8 byte sequence.
3509 40
   *             </p>
3510
   */
3511
  public static function ord($chr, $encoding = 'UTF-8')
3512 18
  {
3513
    if (!$chr && $chr !== '0') {
3514
      return 0;
3515 18
    }
3516 17
3517
    if ($encoding !== 'UTF-8') {
3518 17
      $encoding = self::normalize_encoding($encoding);
3519 17
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3520 17
    }
3521 2
3522 2
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3523
      self::checkForSupport();
3524
    }
3525 18
3526
    if (self::$support['intlChar'] === true) {
3527 18
      $tmpReturn = \IntlChar::ord($chr);
3528 18
      if ($tmpReturn) {
3529 18
        return $tmpReturn;
3530
      }
3531 18
    }
3532 18
3533 18
    // use static cache, if there is no support for "IntlChar"
3534
    static $cache = array();
3535
    if (isset($cache[$chr]) === true) {
3536
      return $cache[$chr];
3537 18
    }
3538
3539 18
    $chr_orig = $chr;
3540
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3541
    $chr = unpack('C*', substr($chr, 0, 4));
3542
    $code = $chr ? $chr[1] : 0;
3543
3544
    if (0xF0 <= $code && isset($chr[4])) {
3545
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3546
    }
3547
3548
    if (0xE0 <= $code && isset($chr[3])) {
3549
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3550
    }
3551
3552
    if (0xC0 <= $code && isset($chr[2])) {
3553
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3554
    }
3555
3556
    return $cache[$chr_orig] = $code;
3557
  }
3558
3559
  /**
3560 1
   * Parses the string into an array (into the the second parameter).
3561
   *
3562 1
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3563 1
   *          if the second parameter is not set!
3564
   *
3565
   * @link http://php.net/manual/en/function.parse-str.php
3566
   *
3567
   * @param string  $str       <p>The input string.</p>
3568 1
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3569 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3570 1
   *
3571 1
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3572
   */
3573
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3574 1
  {
3575
    if ($cleanUtf8 === true) {
3576
      $str = self::clean($str);
3577
    }
3578
3579
    $return = \mb_parse_str($str, $result);
3580
    if ($return === false || empty($result)) {
3581
      return false;
3582
    }
3583
3584
    return true;
3585
  }
3586 36
3587
  /**
3588 36
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3589
   *
3590 36
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3591 2
   */
3592
  public static function pcre_utf8_support()
3593
  {
3594
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3595 36
    return (bool)@preg_match('//u', '');
3596 36
  }
3597
3598 36
  /**
3599
   * Create an array containing a range of UTF-8 characters.
3600
   *
3601
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3602 36
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3603
   *
3604 36
   * @return array
3605 6
   */
3606 6
  public static function range($var1, $var2)
3607
  {
3608 36
    if (!$var1 || !$var2) {
3609 36
      return array();
3610 36
    }
3611 36
3612 36 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3613
      $start = (int)$var1;
3614 36
    } elseif (ctype_xdigit($var1)) {
3615
      $start = (int)self::hex_to_int($var1);
3616
    } else {
3617
      $start = self::ord($var1);
3618
    }
3619
3620
    if (!$start) {
3621
      return array();
3622
    }
3623
3624 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3625
      $end = (int)$var2;
3626
    } elseif (ctype_xdigit($var2)) {
3627
      $end = (int)self::hex_to_int($var2);
3628
    } else {
3629
      $end = self::ord($var2);
3630
    }
3631
3632
    if (!$end) {
3633
      return array();
3634
    }
3635
3636
    return array_map(
3637
        array(
3638
            '\\voku\\helper\\UTF8',
3639
            'chr',
3640
        ),
3641
        range($start, $end)
3642
    );
3643
  }
3644
3645
  /**
3646 36
   * Multi decode html entity & fix urlencoded-win1252-chars.
3647 5
   *
3648
   * e.g:
3649 5
   * 'test+test'                     => 'test+test'
3650 5
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3651
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3652
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3653 36
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3654
   * 'Düsseldorf'                   => 'Düsseldorf'
3655
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3656
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3657 36
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3658
   *
3659
   * @param string $str          <p>The input string.</p>
3660
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3661
   *
3662
   * @return string
3663
   */
3664 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3665
  {
3666
    $str = (string)$str;
3667
3668
    if (!isset($str[0])) {
3669
      return '';
3670 12
    }
3671
3672
    $pattern = '/%u([0-9a-f]{3,4})/i';
3673
    if (preg_match($pattern, $str)) {
3674
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3675
    }
3676 12
3677 2
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3678 1
3679 2
    do {
3680 1
      $str_compare = $str;
3681 2
3682
      $str = self::fix_simple_utf8(
3683 2
          rawurldecode(
3684
              self::html_entity_decode(
3685
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3686 2
                  $flags
3687
              )
3688
          )
3689
      );
3690
3691
    } while ($multi_decode === true && $str_compare !== $str);
3692 12
3693 3
    return (string)$str;
3694
  }
3695
3696
  /**
3697
   * alias for "UTF8::remove_bom()"
3698
   *
3699
   * @see UTF8::remove_bom()
3700 12
   *
3701 9
   * @param string $str
3702
   *
3703
   * @return string
3704
   *
3705
   * @deprecated
3706
   */
3707
  public static function removeBOM($str)
3708
  {
3709
    return self::remove_bom($str);
3710 6
  }
3711 6
3712 6
  /**
3713 6
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3714 6
   *
3715 6
   * @param string $str <p>The input string.</p>
3716 6
   *
3717 6
   * @return string <p>String without UTF-BOM</p>
3718 6
   */
3719 6
  public static function remove_bom($str)
3720 6
  {
3721 6
    foreach (self::$bom as $bomString => $bomByteLength) {
3722 6
      if (0 === strpos($str, $bomString)) {
3723 6
        $str = substr($str, $bomByteLength);
3724 6
      }
3725 6
    }
3726 6
3727 6
    return $str;
3728 6
  }
3729 6
3730 6
  /**
3731
   * Removes duplicate occurrences of a string in another string.
3732 6
   *
3733 6
   * @param string          $str  <p>The base string.</p>
3734 6
   * @param string|string[] $what <p>String to search for in the base string.</p>
3735
   *
3736
   * @return string <p>The result string with removed duplicates.</p>
3737
   */
3738
  public static function remove_duplicates($str, $what = ' ')
3739
  {
3740
    if (is_string($what)) {
3741
      $what = array($what);
3742
    }
3743
3744
    if (is_array($what)) {
3745
      /** @noinspection ForeachSourceInspection */
3746
      foreach ($what as $item) {
3747
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3748
      }
3749
    }
3750
3751
    return $str;
3752
  }
3753
3754
  /**
3755
   * Remove invisible characters from a string.
3756
   *
3757
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3758
   *
3759
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3760
   *
3761
   * @param string $str
3762
   * @param bool   $url_encoded
3763
   * @param string $replacement
3764
   *
3765
   * @return string
3766
   */
3767
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3768
  {
3769
    // init
3770
    $non_displayables = array();
3771
3772
    // every control character except newline (dec 10),
3773
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3774
    if ($url_encoded) {
3775
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3776
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3777
    }
3778 14
3779
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3780 14
3781
    do {
3782
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3783 14
    } while ($count !== 0);
3784 14
3785 1
    return $str;
3786 1
  }
3787 13
3788
  /**
3789 14
   * Replace the diamond question mark (�) with the replacement.
3790
   *
3791 14
   * @param string $str <p>The input string</p>
3792 14
   * @param string $unknown <p>The replacement character.</p>
3793
   *
3794 14
   * @return string
3795
   */
3796
  public static function replace_diamond_question_mark($str, $unknown = '?')
3797
  {
3798
    $str = (string)$str;
3799
3800
    if (!isset($str[0])) {
3801
      return '';
3802
    }
3803
3804
    $unknownHelper = $unknown;
3805
    if ($unknown === '') {
3806 1
      $unknownHelper = 'none';
3807
    }
3808 1
3809
    if (self::$support['mbstring'] === false) {
3810 1
      trigger_error('UTF8::replace_diamond_question_mark() without mbstring cannot handle all chars correctly', E_USER_WARNING);
3811
    }
3812
3813
    $save = \mb_substitute_character();
3814 1
    \mb_substitute_character($unknownHelper);
3815
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3816 1
    $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3817
    \mb_substitute_character($save);
3818
3819
    return str_replace(
3820 1
        array(
3821 1
            "\xEF\xBF\xBD",
3822
            '�',
3823
        ),
3824 1
        array(
3825 1
            $unknown,
3826 1
            $unknown,
3827 1
        ),
3828
        $str
3829 1
    );
3830
  }
3831
3832 1
  /**
3833
   * Strip whitespace or other characters from end of a UTF-8 string.
3834
   *
3835 1
   * @param string $str   <p>The string to be trimmed.</p>
3836
   * @param string $chars <p>Optional characters to be stripped.</p>
3837
   *
3838
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3839
   */
3840 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3841
  {
3842
    $str = (string)$str;
3843
3844
    if (!isset($str[0])) {
3845
      return '';
3846
    }
3847
3848
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3849
    if ($chars === INF || !$chars) {
3850
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3851 2
    }
3852
3853 2
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3854
  }
3855
3856 2
  /**
3857 2
   * rxClass
3858
   *
3859 2
   * @param string $s
3860
   * @param string $class
3861 2
   *
3862 2
   * @return string
3863
   */
3864 2
  private static function rxClass($s, $class = '')
3865
  {
3866
    static $rxClassCache = array();
3867 2
3868 2
    $cacheKey = $s . $class;
3869 2
3870 2
    if (isset($rxClassCache[$cacheKey])) {
3871 2
      return $rxClassCache[$cacheKey];
3872
    }
3873 2
3874 2
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3875 2
    $class = array($class);
3876 2
3877 2
    /** @noinspection SuspiciousLoopInspection */
3878 2
    foreach (self::str_split($s) as $s) {
3879
      if ('-' === $s) {
3880 2
        $class[0] = '-' . $class[0];
3881 2
      } elseif (!isset($s[2])) {
3882 2
        $class[0] .= preg_quote($s, '/');
3883 2
      } elseif (1 === self::strlen($s)) {
3884 2
        $class[0] .= $s;
3885 2
      } else {
3886
        $class[] = $s;
3887 2
      }
3888
    }
3889
3890 2
    if ($class[0]) {
3891
      $class[0] = '[' . $class[0] . ']';
3892
    }
3893
3894
    if (1 === count($class)) {
3895
      $return = $class[0];
3896
    } else {
3897
      $return = '(?:' . implode('|', $class) . ')';
3898
    }
3899
3900
    $rxClassCache[$cacheKey] = $return;
3901
3902
    return $return;
3903
  }
3904
3905
  /**
3906
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3907
   */
3908
  public static function showSupport()
3909
  {
3910
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3911 1
      self::checkForSupport();
3912
    }
3913 1
3914
    foreach (self::$support as $utf8Support) {
3915 1
      echo $utf8Support . "\n<br>";
3916
    }
3917
  }
3918
3919
  /**
3920
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3921
   *
3922
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3923
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3924
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3925
   *
3926
   * @return string <p>The HTML numbered entity.</p>
3927
   */
3928
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3929
  {
3930
    // init
3931
    $char = (string)$char;
3932
3933
    if (!isset($char[0])) {
3934
      return '';
3935
    }
3936
3937
    if (
3938
        $keepAsciiChars === true
3939
        &&
3940
        self::is_ascii($char) === true
3941
    ) {
3942
      return $char;
3943
    }
3944
3945
    if ($encoding !== 'UTF-8') {
3946
      $encoding = self::normalize_encoding($encoding);
3947 12
    }
3948
3949 12
    return '&#' . self::ord($char, $encoding) . ';';
3950
  }
3951
3952
  /**
3953
   * Convert a string to an array of Unicode characters.
3954
   *
3955
   * @param string  $str       <p>The string to split into array.</p>
3956
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3957
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3958
   *
3959 1
   * @return string[] <p>An array containing chunks of the string.</p>
3960
   */
3961 1
  public static function split($str, $length = 1, $cleanUtf8 = false)
3962
  {
3963 1
    $str = (string)$str;
3964
3965 1
    if (!isset($str[0])) {
3966
      return array();
3967
    }
3968
3969
    // init
3970
    $str = (string)$str;
3971
    $ret = array();
3972
3973
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3974
      self::checkForSupport();
3975
    }
3976
3977 1
    if (self::$support['pcre_utf8'] === true) {
3978
3979 1
      if ($cleanUtf8 === true) {
3980
        $str = self::clean($str);
3981 1
      }
3982 1
3983 1
      preg_match_all('/./us', $str, $retArray);
3984
      if (isset($retArray[0])) {
3985 1
        $ret = $retArray[0];
3986 1
      }
3987 1
      unset($retArray);
3988 1
3989
    } else {
3990
3991 1
      // fallback
3992
3993
      $len = strlen($str);
3994
3995
      /** @noinspection ForeachInvariantsInspection */
3996
      for ($i = 0; $i < $len; $i++) {
3997
        if (($str[$i] & "\x80") === "\x00") {
3998
          $ret[] = $str[$i];
3999
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4000
          if (($str[$i + 1] & "\xC0") === "\x80") {
4001
            $ret[] = $str[$i] . $str[$i + 1];
4002 21
4003
            $i++;
4004
          }
4005 21 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4006 21
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4007
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4008 21
4009 1
            $i += 2;
4010
          }
4011
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4012 20 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4013
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4014
4015
            $i += 3;
4016 20
          }
4017 20
        }
4018
      }
4019 20
    }
4020 20
4021
    if ($length > 1) {
4022
      $ret = array_chunk($ret, $length);
4023 1
4024 1
      return array_map(
4025
          function ($item) {
4026
            return implode('', $item);
4027 1
          }, $ret
4028 1
      );
4029 1
    }
4030 1
4031 1
    /** @noinspection OffsetOperationsInspection */
4032
    if (isset($ret[0]) && $ret[0] === '') {
4033 1
      return array();
4034
    }
4035 1
4036
    return $ret;
4037
  }
4038
4039
  /**
4040
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4041
   *
4042
   * @param string $str <p>The input string.</p>
4043
   *
4044
   * @return false|string <p>
4045 1
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4046
   *                      otherwise it will return false.
4047 1
   *                      </p>
4048
   */
4049 1
  public static function str_detect_encoding($str)
4050
  {
4051 1
    //
4052
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4053
    //
4054
4055
    if (self::is_binary($str)) {
4056
      if (self::is_utf16($str) === 1) {
4057
        return 'UTF-16LE';
4058
      } elseif (self::is_utf16($str) === 2) {
4059
        return 'UTF-16BE';
4060
      } elseif (self::is_utf32($str) === 1) {
4061
        return 'UTF-32LE';
4062
      } elseif (self::is_utf32($str) === 2) {
4063
        return 'UTF-32BE';
4064
      }
4065 7
    }
4066
4067 7
    //
4068
    // 2.) simple check for ASCII chars
4069
    //
4070
4071
    if (self::is_ascii($str) === true) {
4072
      return 'ASCII';
4073
    }
4074
4075
    //
4076
    // 3.) simple check for UTF-8 chars
4077
    //
4078
4079
    if (self::is_utf8($str) === true) {
4080
      return 'UTF-8';
4081
    }
4082
4083 1
    //
4084
    // 4.) check via "\mb_detect_encoding()"
4085 1
    //
4086 1
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4087
4088 1
    $detectOrder = array(
4089
        'ISO-8859-1',
4090 1
        'ISO-8859-2',
4091
        'ISO-8859-3',
4092 1
        'ISO-8859-4',
4093 1
        'ISO-8859-5',
4094 1
        'ISO-8859-6',
4095 1
        'ISO-8859-7',
4096
        'ISO-8859-8',
4097 1
        'ISO-8859-9',
4098
        'ISO-8859-10',
4099 1
        'ISO-8859-13',
4100 1
        'ISO-8859-14',
4101 1
        'ISO-8859-15',
4102 1
        'ISO-8859-16',
4103 1
        'WINDOWS-1251',
4104 1
        'WINDOWS-1252',
4105
        'WINDOWS-1254',
4106 1
        'ISO-2022-JP',
4107
        'JIS',
4108 1
        'EUC-JP',
4109
    );
4110
4111
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4112 1
    if ($encoding) {
4113
      return $encoding;
4114
    }
4115
4116
    //
4117
    // 5.) check via "iconv()"
4118
    //
4119
4120
    $md5 = md5($str);
4121
    foreach (self::$iconvEncoding as $encodingTmp) {
4122
      # INFO: //IGNORE and //TRANSLIT still throw notice
4123
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4124
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4125
        return $encodingTmp;
4126
      }
4127
    }
4128
4129 9
    return false;
4130
  }
4131 9
4132
  /**
4133
   * Check if the string ends with the given substring.
4134
   *
4135
   * @param string $haystack <p>The string to search in.</p>
4136
   * @param string $needle   <p>The substring to search for.</p>
4137
   *
4138
   * @return bool
4139
   */
4140 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4141
  {
4142
    $haystack = (string)$haystack;
4143
    $needle = (string)$needle;
4144
4145
    if (!isset($haystack[0], $needle[0])) {
4146
      return false;
4147 1
    }
4148
4149 1
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4150
      return true;
4151
    }
4152
4153
    return false;
4154
  }
4155
4156
  /**
4157
   * Check if the string ends with the given substring, case insensitive.
4158
   *
4159
   * @param string $haystack <p>The string to search in.</p>
4160
   * @param string $needle   <p>The substring to search for.</p>
4161
   *
4162
   * @return bool
4163
   */
4164 12 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4165
  {
4166 12
    $haystack = (string)$haystack;
4167 11
    $needle = (string)$needle;
4168 11
4169 12
    if (!isset($haystack[0], $needle[0])) {
4170
      return false;
4171
    }
4172
4173
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4174
      return true;
4175
    }
4176
4177
    return false;
4178
  }
4179
4180
  /**
4181
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4182 9
   *
4183
   * @link  http://php.net/manual/en/function.str-ireplace.php
4184 9
   *
4185 1
   * @param mixed $search  <p>
4186
   *                       Every replacement with search array is
4187
   *                       performed on the result of previous replacement.
4188 8
   *                       </p>
4189 2
   * @param mixed $replace <p>
4190 2
   *                       </p>
4191
   * @param mixed $subject <p>
4192 8
   *                       If subject is an array, then the search and
4193 8
   *                       replace is performed with every entry of
4194 1
   *                       subject, and the return value is an array as
4195
   *                       well.
4196
   *                       </p>
4197 7
   * @param int   $count   [optional] <p>
4198
   *                       The number of matched and replaced needles will
4199 7
   *                       be returned in count which is passed by
4200
   *                       reference.
4201
   *                       </p>
4202 1
   *
4203
   * @return mixed <p>A string or an array of replacements.</p>
4204
   */
4205
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4206
  {
4207
    $search = (array)$search;
4208
4209
    /** @noinspection AlterInForeachInspection */
4210
    foreach ($search as &$s) {
4211
      if ('' === $s .= '') {
4212
        $s = '/^(?<=.)$/';
4213
      } else {
4214
        $s = '/' . preg_quote($s, '/') . '/ui';
4215
      }
4216
    }
4217
4218 1
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4219
    $count = $replace; // used as reference parameter
4220 1
4221
    return $subject;
4222
  }
4223
4224
  /**
4225
   * Check if the string starts with the given substring, case insensitive.
4226
   *
4227
   * @param string $haystack <p>The string to search in.</p>
4228
   * @param string $needle   <p>The substring to search for.</p>
4229
   *
4230
   * @return bool
4231
   */
4232 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4233
  {
4234 2
    $haystack = (string)$haystack;
4235 2
    $needle = (string)$needle;
4236
4237 2
    if (!isset($haystack[0], $needle[0])) {
4238 2
      return false;
4239 2
    }
4240
4241 2
    if (self::stripos($haystack, $needle) === 0) {
4242 2
      return true;
4243
    }
4244
4245
    return false;
4246
  }
4247
4248
  /**
4249
   * Limit the number of characters in a string, but also after the next word.
4250
   *
4251
   * @param string $str
4252 3
   * @param int    $length
4253
   * @param string $strAddOn
4254 3
   *
4255 3
   * @return string
4256 3
   */
4257
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4258 3
  {
4259
    $str = (string)$str;
4260 3
4261
    if (!isset($str[0])) {
4262
      return '';
4263
    }
4264
4265
    $length = (int)$length;
4266
4267
    if (self::strlen($str) <= $length) {
4268
      return $str;
4269
    }
4270
4271
    if (self::substr($str, $length - 1, 1) === ' ') {
4272
      return self::substr($str, 0, $length - 1) . $strAddOn;
4273
    }
4274
4275
    $str = self::substr($str, 0, $length);
4276
    $array = explode(' ', $str);
4277
    array_pop($array);
4278
    $new_str = implode(' ', $array);
4279
4280
    if ($new_str === '') {
4281
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4282 2
    } else {
4283
      $str = $new_str . $strAddOn;
4284
    }
4285 2
4286
    return $str;
4287 2
  }
4288
4289
  /**
4290
   * Pad a UTF-8 string to given length with another string.
4291
   *
4292
   * @param string $str        <p>The input string.</p>
4293
   * @param int    $pad_length <p>The length of return string.</p>
4294
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4295
   * @param int    $pad_type   [optional] <p>
4296
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4297
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4298
   *                           </p>
4299
   *
4300
   * @return string <strong>Returns the padded string</strong>
4301
   */
4302
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4303
  {
4304
    $str_length = self::strlen($str);
4305
4306
    if (
4307
        is_int($pad_length) === true
4308
        &&
4309
        $pad_length > 0
4310
        &&
4311
        $pad_length >= $str_length
4312
    ) {
4313
      $ps_length = self::strlen($pad_string);
4314 8
4315
      $diff = $pad_length - $str_length;
4316 8
4317 8
      switch ($pad_type) {
4318 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4319 8
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4320 3
          $pre = self::substr($pre, 0, $diff);
4321
          $post = '';
4322
          break;
4323 7
4324 1
        case STR_PAD_BOTH:
4325 1
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4326 1
          $pre = self::substr($pre, 0, (int)$diff / 2);
4327
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4328
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4329
          break;
4330 7
4331 1
        case STR_PAD_RIGHT:
4332 7 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4333 7
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4334 7
          $post = self::substr($post, 0, $diff);
4335
          $pre = '';
4336
      }
4337
4338 7
      return $pre . $str . $post;
4339
    }
4340
4341
    return $str;
4342
  }
4343
4344
  /**
4345
   * Repeat a string.
4346
   *
4347
   * @param string $str        <p>
4348
   *                           The string to be repeated.
4349
   *                           </p>
4350
   * @param int    $multiplier <p>
4351
   *                           Number of time the input string should be
4352
   *                           repeated.
4353
   *                           </p>
4354
   *                           <p>
4355 8
   *                           multiplier has to be greater than or equal to 0.
4356
   *                           If the multiplier is set to 0, the function
4357 8
   *                           will return an empty string.
4358 2
   *                           </p>
4359
   *
4360
   * @return string <p>The repeated string.</p>
4361 6
   */
4362
  public static function str_repeat($str, $multiplier)
4363
  {
4364
    $str = self::filter($str);
4365 6
4366
    return str_repeat($str, $multiplier);
4367
  }
4368
4369
  /**
4370
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4371
   *
4372 6
   * Replace all occurrences of the search string with the replacement string
4373
   *
4374
   * @link http://php.net/manual/en/function.str-replace.php
4375
   *
4376
   * @param mixed $search  <p>
4377
   *                       The value being searched for, otherwise known as the needle.
4378
   *                       An array may be used to designate multiple needles.
4379
   *                       </p>
4380
   * @param mixed $replace <p>
4381
   *                       The replacement value that replaces found search
4382
   *                       values. An array may be used to designate multiple replacements.
4383
   *                       </p>
4384
   * @param mixed $subject <p>
4385
   *                       The string or array being searched and replaced on,
4386
   *                       otherwise known as the haystack.
4387 62
   *                       </p>
4388
   *                       <p>
4389 62
   *                       If subject is an array, then the search and
4390
   *                       replace is performed with every entry of
4391 62
   *                       subject, and the return value is an array as
4392 4
   *                       well.
4393
   *                       </p>
4394
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4395
   *
4396
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4397 61
   */
4398 2
  public static function str_replace($search, $replace, $subject, &$count = null)
4399 61
  {
4400 60
    return str_replace($search, $replace, $subject, $count);
4401 60
  }
4402 2
4403
  /**
4404
   * Shuffles all the characters in the string.
4405
   *
4406 61
   * @param string $str <p>The input string</p>
4407 61
   *
4408 1
   * @return string <p>The shuffled string.</p>
4409
   */
4410
  public static function str_shuffle($str)
4411 61
  {
4412 2
    $array = self::split($str);
4413 2
4414
    shuffle($array);
4415 61
4416
    return implode('', $array);
4417
  }
4418
4419
  /**
4420
   * Sort all characters according to code points.
4421
   *
4422
   * @param string $str    <p>A UTF-8 string.</p>
4423
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4424
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4425
   *
4426
   * @return string <p>String of sorted characters.</p>
4427
   */
4428
  public static function str_sort($str, $unique = false, $desc = false)
4429
  {
4430 1
    $array = self::codepoints($str);
4431
4432 1
    if ($unique) {
4433
      $array = array_flip(array_flip($array));
4434
    }
4435
4436
    if ($desc) {
4437
      arsort($array);
4438
    } else {
4439
      asort($array);
4440
    }
4441
4442
    return self::string($array);
4443
  }
4444
4445
  /**
4446
   * Split a string into an array.
4447
   *
4448
   * @param string $str
4449 2
   * @param int    $len
4450
   *
4451 2
   * @return array
4452
   */
4453
  public static function str_split($str, $len = 1)
4454
  {
4455
    // init
4456
    $len = (int)$len;
4457
    $str = (string)$str;
4458
4459
    if (!isset($str[0])) {
4460
      return array();
4461
    }
4462
4463
    if ($len < 1) {
4464
      return str_split($str, $len);
4465
    }
4466
4467 1
    /** @noinspection PhpInternalEntityUsedInspection */
4468
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4469 1
    $a = $a[0];
4470
4471
    if ($len === 1) {
4472
      return $a;
4473
    }
4474
4475
    $arrayOutput = array();
4476
    $p = -1;
4477
4478
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4479
    foreach ($a as $l => $a) {
4480
      if ($l % $len) {
4481
        $arrayOutput[$p] .= $a;
4482
      } else {
4483
        $arrayOutput[++$p] = $a;
4484
      }
4485 2
    }
4486
4487 2
    return $arrayOutput;
4488 2
  }
4489
4490 2
  /**
4491
   * Check if the string starts with the given substring.
4492
   *
4493
   * @param string $haystack <p>The string to search in.</p>
4494
   * @param string $needle   <p>The substring to search for.</p>
4495
   *
4496
   * @return bool
4497
   */
4498 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4499
  {
4500
    $haystack = (string)$haystack;
4501
    $needle = (string)$needle;
4502
4503 1
    if (!isset($haystack[0], $needle[0])) {
4504
      return false;
4505 1
    }
4506 1
4507
    if (self::strpos($haystack, $needle) === 0) {
4508 1
      return true;
4509 1
    }
4510
4511
    return false;
4512 1
  }
4513 1
4514
  /**
4515 1
   * Get a binary representation of a specific string.
4516
   *
4517
   * @param string $str <p>The input string.</p>
4518
   *
4519
   * @return string
4520
   */
4521
  public static function str_to_binary($str)
4522
  {
4523
    $str = (string)$str;
4524
4525
    $value = unpack('H*', $str);
4526
4527
    return base_convert($value[1], 16, 2);
4528
  }
4529
4530
  /**
4531
   * Convert a string into an array of words.
4532
   *
4533
   * @param string $str
4534
   * @param string $charlist
4535 15
   *
4536
   * @return array
4537 15
   */
4538 15
  public static function str_to_words($str, $charlist = '')
4539
  {
4540 15
    $str = (string)$str;
4541 2
4542
    if (!isset($str[0])) {
4543
      return array('');
4544
    }
4545 14
4546
    $charlist = self::rxClass($charlist, '\pL');
4547
4548
    return \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4549 14
  }
4550
4551
  /**
4552
   * alias for "UTF8::to_ascii()"
4553 14
   *
4554
   * @see UTF8::to_ascii()
4555
   *
4556 2
   * @param string $str
4557 2
   * @param string $unknown
4558 2
   * @param bool   $strict
4559
   *
4560 14
   * @return string
4561
   */
4562
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4563
  {
4564
    return self::to_ascii($str, $unknown, $strict);
4565
  }
4566 14
4567 2
  /**
4568 14
   * Counts number of words in the UTF-8 string.
4569 14
   *
4570 14
   * @param string $str      <p>The input string.</p>
4571 1
   * @param int    $format   [optional] <p>
4572
   *                         <strong>0</strong> => return a number of words (default)<br />
4573
   *                         <strong>1</strong> => return an array of words<br />
4574 14
   *                         <strong>2</strong> => return an array of words with word-offset as key
4575 14
   *                         </p>
4576
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4577
   *
4578
   * @return array|int <p>The number of words in the string</p>
4579
   */
4580
  public static function str_word_count($str, $format = 0, $charlist = '')
4581
  {
4582
    $strParts = self::str_to_words($str, $charlist);
4583
4584
    $len = count($strParts);
4585
4586
    if ($format === 1) {
4587
4588
      $numberOfWords = array();
4589
      for ($i = 1; $i < $len; $i += 2) {
4590
        $numberOfWords[] = $strParts[$i];
4591
      }
4592
4593
    } elseif ($format === 2) {
4594
4595
      $numberOfWords = array();
4596
      $offset = self::strlen($strParts[0]);
4597
      for ($i = 1; $i < $len; $i += 2) {
4598
        $numberOfWords[$offset] = $strParts[$i];
4599
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4600
      }
4601
4602
    } else {
4603
4604
      $numberOfWords = ($len - 1) / 2;
4605
4606
    }
4607
4608
    return $numberOfWords;
4609
  }
4610
4611
  /**
4612
   * Case-insensitive string comparison.
4613
   *
4614
   * INFO: Case-insensitive version of UTF8::strcmp()
4615
   *
4616
   * @param string $str1
4617
   * @param string $str2
4618
   *
4619
   * @return int <p>
4620 1
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4621
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4622 1
   *             <strong>0</strong> if they are equal.
4623 1
   *             </p>
4624 1
   */
4625
  public static function strcasecmp($str1, $str2)
4626 1
  {
4627
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4628
  }
4629
4630
  /**
4631
   * alias for "UTF8::strstr()"
4632
   *
4633 1
   * @see UTF8::strstr()
4634
   *
4635
   * @param string  $haystack
4636
   * @param string  $needle
4637
   * @param bool    $before_needle
4638
   * @param string  $encoding
4639
   * @param boolean $cleanUtf8
4640
   *
4641
   * @return string|false
4642
   */
4643 4
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4644
  {
4645 4
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4646
  }
4647 4
4648 2
  /**
4649
   * Case-sensitive string comparison.
4650
   *
4651 3
   * @param string $str1
4652
   * @param string $str2
4653
   *
4654
   * @return int  <p>
4655
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4656
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4657
   *              <strong>0</strong> if they are equal.
4658
   *              </p>
4659
   */
4660
  public static function strcmp($str1, $str2)
4661
  {
4662
    /** @noinspection PhpUndefinedClassInspection */
4663
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4664
        \Normalizer::normalize($str1, \Normalizer::NFD),
4665
        \Normalizer::normalize($str2, \Normalizer::NFD)
4666
    );
4667
  }
4668
4669
  /**
4670
   * Find length of initial segment not matching mask.
4671
   *
4672
   * @param string $str
4673
   * @param string $charList
4674
   * @param int    $offset
4675
   * @param int    $length
4676
   *
4677 1
   * @return int|null
4678
   */
4679 1
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4680 1
  {
4681 1
    if ('' === $charList .= '') {
4682
      return null;
4683 1
    }
4684
4685
    if ($offset || 2147483647 !== $length) {
4686
      $str = (string)self::substr($str, $offset, $length);
4687
    }
4688
4689
    $str = (string)$str;
4690 1
    if (!isset($str[0])) {
4691
      return null;
4692
    }
4693
4694
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4695
      /** @noinspection OffsetOperationsInspection */
4696
      return self::strlen($length[1]);
4697
    }
4698
4699
    return self::strlen($str);
4700
  }
4701
4702
  /**
4703
   * alias for "UTF8::stristr()"
4704
   *
4705
   * @see UTF8::stristr()
4706
   *
4707 1
   * @param string  $haystack
4708
   * @param string  $needle
4709 1
   * @param bool    $before_needle
4710
   * @param string  $encoding
4711
   * @param boolean $cleanUtf8
4712
   *
4713
   * @return string|false
4714
   */
4715
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4716
  {
4717
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4718
  }
4719
4720
  /**
4721
   * Create a UTF-8 string from code points.
4722
   *
4723
   * INFO: opposite to UTF8::codepoints()
4724
   *
4725
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4726
   *
4727
   * @return string <p>UTF-8 encoded string.</p>
4728
   */
4729 11
  public static function string(array $array)
4730
  {
4731 11
    return implode(
4732
        '',
4733 11
        array_map(
4734 2
            array(
4735 2
                '\\voku\\helper\\UTF8',
4736
                'chr',
4737 11
            ),
4738
            $array
4739 11
        )
4740 2
    );
4741
  }
4742
4743
  /**
4744 10
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4745 10
   *
4746
   * @param string $str <p>The input string.</p>
4747
   *
4748
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4749 10
   */
4750
  public static function string_has_bom($str)
4751 10
  {
4752
    foreach (self::$bom as $bomString => $bomByteLength) {
4753
      if (0 === strpos($str, $bomString)) {
4754 3
        return true;
4755 3
      }
4756 3
    }
4757
4758 10
    return false;
4759
  }
4760
4761
  /**
4762
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4763
   *
4764 10
   * @link http://php.net/manual/en/function.strip-tags.php
4765 1
   *
4766 10
   * @param string  $str            <p>
4767 10
   *                                The input string.
4768 10
   *                                </p>
4769 1
   * @param string  $allowable_tags [optional] <p>
4770
   *                                You can use the optional second parameter to specify tags which should
4771
   *                                not be stripped.
4772
   *                                </p>
4773
   *                                <p>
4774 10
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4775 10
   *                                can not be changed with allowable_tags.
4776 10
   *                                </p>
4777 10
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
4778
   *
4779
   * @return string <p>The stripped string.</p>
4780
   */
4781
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
4782
  {
4783
    if ($cleanUtf8) {
4784
      $str = self::clean($str);
4785
    }
4786
4787
    return strip_tags($str, $allowable_tags);
4788
  }
4789
4790
  /**
4791
   * Finds position of first occurrence of a string within another, case insensitive.
4792
   *
4793
   * @link http://php.net/manual/en/function.mb-stripos.php
4794
   *
4795
   * @param string  $haystack  <p>
4796
   *                           The string from which to get the position of the first occurrence
4797
   *                           of needle
4798
   *                           </p>
4799
   * @param string  $needle    <p>
4800
   *                           The string to find in haystack
4801
   *                           </p>
4802
   * @param int     $offset    [optional] <p>
4803
   *                           The position in haystack
4804
   *                           to start searching
4805
   *                           </p>
4806
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4807
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4808
   *
4809
   * @return int|false <p>
4810
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4811
   *                   or false if needle is not found.
4812
   *                   </p>
4813 10
   */
4814
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4815
  {
4816 10
    $haystack = (string)$haystack;
4817 10
    $needle = (string)$needle;
4818
4819 10
    if (!isset($haystack[0], $needle[0])) {
4820 2
      return false;
4821 2
    }
4822
4823 10
    if ($cleanUtf8 === true) {
4824 10
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4825 2
      // if invalid characters are found in $haystack before $needle
4826
      $haystack = self::clean($haystack);
4827
      $needle = self::clean($needle);
4828 8
    }
4829
4830 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4831
        $encoding === 'UTF-8'
4832
        ||
4833
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4834
    ) {
4835
      $encoding = 'UTF-8';
4836
    } else {
4837
      $encoding = self::normalize_encoding($encoding);
4838
    }
4839
4840
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4841
      self::checkForSupport();
4842
    }
4843
4844
    if (
4845 2
        $encoding == 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4846
        &&
4847 2
        self::$support['intl'] === true
4848
        &&
4849
        Bootup::is_php('5.4')
4850
    ) {
4851
      return \grapheme_stripos($haystack, $needle, $offset);
4852
    }
4853
4854 2
    // fallback to "mb_"-function via polyfill
4855 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4856 1
  }
4857
4858
  /**
4859
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4860 2
   *
4861 2
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4862 2
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4863 2
   * @param bool    $before_needle [optional] <p>
4864
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4865
   *                               haystack before the first occurrence of the needle (excluding the needle).
4866
   *                               </p>
4867
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4868
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4869
   *
4870
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4871
   */
4872
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4873
  {
4874
    $haystack = (string)$haystack;
4875
    $needle = (string)$needle;
4876
4877
    if (!isset($haystack[0], $needle[0])) {
4878
      return false;
4879
    }
4880
4881
    if ($encoding !== 'UTF-8') {
4882 11
      $encoding = self::normalize_encoding($encoding);
4883
    }
4884 11
4885 11
    if ($cleanUtf8 === true) {
4886 11
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4887
      // if invalid characters are found in $haystack before $needle
4888 11
      $needle = self::clean($needle);
4889 1
      $haystack = self::clean($haystack);
4890 1
    }
4891 1
4892
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4893 11
      self::checkForSupport();
4894
    }
4895 11
4896 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4897 11
        $encoding !== 'UTF-8'
4898 1
        &&
4899 1
        self::$support['mbstring'] === false
4900
    ) {
4901
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4902 11
    }
4903 11
4904
    if (self::$support['mbstring'] === true) {
4905 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4906
    }
4907 11
4908
    if (self::$support['intl'] === true) {
4909
      return \grapheme_stristr($haystack, $needle, $before_needle);
4910
    }
4911
4912
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4913
4914
    if (!isset($match[1])) {
4915
      return false;
4916
    }
4917
4918
    if ($before_needle) {
4919
      return $match[1];
4920
    }
4921 21
4922
    return self::substr($haystack, self::strlen($match[1]));
4923
  }
4924 21
4925
  /**
4926 21
   * Get the string length, not the byte-length!
4927 6
   *
4928
   * @link     http://php.net/manual/en/function.mb-strlen.php
4929
   *
4930 19
   * @param string  $str       <p>The string being checked for length.</p>
4931
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4932
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4933
   *
4934
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4935
   *             character counted as +1)</p>
4936 19
   */
4937 2
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4938 2
  {
4939
    $str = (string)$str;
4940 19
4941
    if (!isset($str[0])) {
4942
      return 0;
4943
    }
4944
4945 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4946
        $encoding === 'UTF-8'
4947
        ||
4948
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4949
    ) {
4950 3
      $encoding = 'UTF-8';
4951
    } else {
4952 3
      $encoding = self::normalize_encoding($encoding);
4953
    }
4954
4955
    switch ($encoding) {
4956
      case 'ASCII':
4957
      case 'CP850':
4958
        return strlen($str);
4959
    }
4960
4961
    if ($cleanUtf8 === true) {
4962
      $str = self::clean($str);
4963
    }
4964
4965
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4966 16
      self::checkForSupport();
4967
    }
4968 16
4969 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4970 16
        $encoding !== 'UTF-8'
4971 2
        &&
4972
        self::$support['mbstring'] === false
4973
        &&
4974 15
        self::$support['iconv'] === false
4975
    ) {
4976
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4977
    }
4978
4979
    if (
4980 15
        $encoding !== 'UTF-8'
4981 2
        &&
4982 2
        self::$support['iconv'] === true
4983
        &&
4984 15
        self::$support['mbstring'] === false
4985
    ) {
4986
      $returnTmp = \iconv_strlen($str, $encoding);
4987
      if ($returnTmp !== false) {
4988
        return $returnTmp;
4989
      }
4990
    }
4991
4992
    if (self::$support['mbstring'] === true) {
4993
      return \mb_strlen($str, $encoding);
4994
    }
4995
4996
    if (self::$support['intl'] === true) {
4997
      $str = self::clean($str);
4998
      $returnTmp = \grapheme_strlen($str);
4999
      if ($returnTmp !== null) {
5000
        return $returnTmp;
5001 1
      }
5002
    }
5003 1
5004 1
    if (self::$support['iconv'] === true) {
5005 1
      $returnTmp = \iconv_strlen($str, $encoding);
5006 1
      if ($returnTmp !== false) {
5007 1
        return $returnTmp;
5008
      }
5009 1
    }
5010 1
5011 1
    // fallback via vanilla php
5012 1
    preg_match_all('/./us', $str, $parts);
5013 1
    $returnTmp = count($parts[0]);
5014
    if ($returnTmp !== 0) {
5015 1
      return $returnTmp;
5016 1
    }
5017
5018 1
    // fallback to "mb_"-function via polyfill
5019
    return \mb_strlen($str);
5020
  }
5021
5022
  /**
5023
   * Case insensitive string comparisons using a "natural order" algorithm.
5024
   *
5025
   * INFO: natural order version of UTF8::strcasecmp()
5026
   *
5027
   * @param string $str1 <p>The first string.</p>
5028
   * @param string $str2 <p>The second string.</p>
5029
   *
5030 1
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5031
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5032 1
   *             <strong>0</strong> if they are equal
5033 1
   */
5034 1
  public static function strnatcasecmp($str1, $str2)
5035
  {
5036 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5037
  }
5038
5039
  /**
5040 1
   * String comparisons using a "natural order" algorithm
5041 1
   *
5042
   * INFO: natural order version of UTF8::strcmp()
5043 1
   *
5044
   * @link  http://php.net/manual/en/function.strnatcmp.php
5045
   *
5046
   * @param string $str1 <p>The first string.</p>
5047
   * @param string $str2 <p>The second string.</p>
5048
   *
5049
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5050
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5051
   *             <strong>0</strong> if they are equal
5052
   */
5053
  public static function strnatcmp($str1, $str2)
5054
  {
5055
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5056
  }
5057
5058
  /**
5059 47
   * Case-insensitive string comparison of the first n characters.
5060
   *
5061
   * @link  http://php.net/manual/en/function.strncasecmp.php
5062 47
   *
5063
   * @param string $str1 <p>The first string.</p>
5064 47
   * @param string $str2 <p>The second string.</p>
5065 9
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5066
   *
5067
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5068 45
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5069
   *             <strong>0</strong> if they are equal
5070
   */
5071
  public static function strncasecmp($str1, $str2, $len)
5072 1
  {
5073 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5074
  }
5075 45
5076 45
  /**
5077 37
   * String comparison of the first n characters.
5078 37
   *
5079
   * @link  http://php.net/manual/en/function.strncmp.php
5080 45
   *
5081 2
   * @param string $str1 <p>The first string.</p>
5082
   * @param string $str2 <p>The second string.</p>
5083
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5084 43
   *
5085 20
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5086 20
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5087 41
   *             <strong>0</strong> if they are equal
5088
   */
5089
  public static function strncmp($str1, $str2, $len)
5090 43
  {
5091
    $str1 = self::substr($str1, 0, $len);
5092
    $str2 = self::substr($str2, 0, $len);
5093
5094
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5091 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5092 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5095
  }
5096 43
5097 2
  /**
5098 43
   * Search a string for any of a set of characters.
5099 43
   *
5100 43
   * @link  http://php.net/manual/en/function.strpbrk.php
5101 1
   *
5102
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5103
   * @param string $char_list <p>This parameter is case sensitive.</p>
5104 43
   *
5105 43
   * @return string String starting from the character found, or false if it is not found.
5106
   */
5107
  public static function strpbrk($haystack, $char_list)
5108
  {
5109
    $haystack = (string)$haystack;
5110
    $char_list = (string)$char_list;
5111
5112
    if (!isset($haystack[0], $char_list[0])) {
5113
      return false;
5114
    }
5115
5116
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5117
      return substr($haystack, strpos($haystack, $m[0]));
5118
    } else {
5119
      return false;
5120
    }
5121
  }
5122
5123
  /**
5124
   * Find position of first occurrence of string in a string.
5125
   *
5126
   * @link http://php.net/manual/en/function.mb-strpos.php
5127
   *
5128
   * @param string  $haystack  <p>The string being checked.</p>
5129
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5130
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5131
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5132
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5133
   *
5134
   * @return int|false <p>
5135 1
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5136
   *                   If needle is not found it returns false.
5137 1
   *                   </p>
5138 1
   */
5139
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5140 1
  {
5141
    $haystack = (string)$haystack;
5142
    $needle = (string)$needle;
5143
5144
    if (!isset($haystack[0], $needle[0])) {
5145
      return false;
5146
    }
5147
5148
    // init
5149
    $offset = (int)$offset;
5150
5151
    // iconv and mbstring do not support integer $needle
5152
5153
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5154
      $needle = (string)self::chr($needle);
5155
    }
5156
5157
    if ($cleanUtf8 === true) {
5158
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5159
      // if invalid characters are found in $haystack before $needle
5160
      $needle = self::clean($needle);
5161 1
      $haystack = self::clean($haystack);
5162
    }
5163 1
5164 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5165
        $encoding === 'UTF-8'
5166 1
        ||
5167 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5168
    ) {
5169
      $encoding = 'UTF-8';
5170 1
    } else {
5171 1
      $encoding = self::normalize_encoding($encoding);
5172 1
    }
5173
5174 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5175 1
      self::checkForSupport();
5176
    }
5177
5178 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5179 1
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...pport['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...port['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5180
        &
5181 1
        self::$support['iconv'] === true
5182 1
        &&
5183 1
        self::$support['mbstring'] === false
5184
    ) {
5185 1
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5186
    }
5187
5188
    if (
5189
        $offset >= 0 // iconv_strpos() can't handle negative offset
5190
        &&
5191
        $encoding !== 'UTF-8'
5192 1
        &&
5193
        self::$support['mbstring'] === false
5194
        &&
5195
        self::$support['iconv'] === true
5196
    ) {
5197
      // ignore invalid negative offset to keep compatibility
5198
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5199
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5200
    }
5201
5202
    if (self::$support['mbstring'] === true) {
5203
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5204
    }
5205
5206 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5207 6
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
5208
      if ($returnTmp !== false) {
5209 6
        return $returnTmp;
5210 1
      }
5211
    }
5212
5213 1
    if (
5214 1
        $offset >= 0 // iconv_strpos() can't handle negative offset
5215 1
        &&
5216 1
        self::$support['iconv'] === true
5217
    ) {
5218
      // ignore invalid negative offset to keep compatibility
5219
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5220 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5221 1
    }
5222 1
5223 1
    // fallback via vanilla php
5224 1
5225 1
    $haystack = self::substr($haystack, $offset);
5226 1
5227 1
    if ($offset < 0) {
5228
      $offset = 0;
5229
    }
5230
5231 1
    $pos = strpos($haystack, $needle);
5232 1
    if ($pos === false) {
5233 1
      return false;
5234 1
    }
5235 1
5236 1
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5237 1
    if ($returnTmp !== false) {
5238 1
      return $returnTmp;
5239
    }
5240
5241 1
    // fallback to "mb_"-function via polyfill
5242 1
    return \mb_strpos($haystack, $needle, $offset);
5243 1
  }
5244 1
5245
  /**
5246
   * Finds the last occurrence of a character in a string within another.
5247
   *
5248 1
   * @link http://php.net/manual/en/function.mb-strrchr.php
5249
   *
5250 6
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5251 1
   * @param string $needle        <p>The string to find in haystack</p>
5252 1
   * @param bool   $before_needle [optional] <p>
5253 1
   *                              Determines which portion of haystack
5254 1
   *                              this function returns.
5255
   *                              If set to true, it returns all of haystack
5256 1
   *                              from the beginning to the last occurrence of needle.
5257
   *                              If set to false, it returns all of haystack
5258
   *                              from the last occurrence of needle to the end,
5259 6
   *                              </p>
5260 6
   * @param string $encoding      [optional] <p>
5261
   *                              Character encoding name to use.
5262 6
   *                              If it is omitted, internal character encoding is used.
5263 4
   *                              </p>
5264 4
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5265
   *
5266 6
   * @return string|false The portion of haystack or false if needle is not found.
5267
   */
5268 6 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5269
  {
5270
    if ($encoding !== 'UTF-8') {
5271
      $encoding = self::normalize_encoding($encoding);
5272
    }
5273
5274
    if ($cleanUtf8 === true) {
5275
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5276
      // if invalid characters are found in $haystack before $needle
5277
      $needle = self::clean($needle);
5278
      $haystack = self::clean($haystack);
5279
    }
5280 1
5281
    // fallback to "mb_"-function via polyfill
5282 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5283
  }
5284 1
5285 1
  /**
5286
   * Reverses characters order in the string.
5287
   *
5288 1
   * @param string $str The input string
5289 1
   *
5290 1
   * @return string The string with characters in the reverse sequence
5291
   */
5292 1
  public static function strrev($str)
5293
  {
5294
    $str = (string)$str;
5295 1
5296 1
    if (!isset($str[0])) {
5297
      return '';
5298 1
    }
5299 1
5300
    return implode('', array_reverse(self::split($str)));
5301 1
  }
5302
5303 1
  /**
5304 1
   * Finds the last occurrence of a character in a string within another, case insensitive.
5305
   *
5306 1
   * @link http://php.net/manual/en/function.mb-strrichr.php
5307
   *
5308 1
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5309
   * @param string  $needle        <p>The string to find in haystack.</p>
5310 1
   * @param bool    $before_needle [optional] <p>
5311
   *                               Determines which portion of haystack
5312 1
   *                               this function returns.
5313
   *                               If set to true, it returns all of haystack
5314
   *                               from the beginning to the last occurrence of needle.
5315
   *                               If set to false, it returns all of haystack
5316
   *                               from the last occurrence of needle to the end,
5317
   *                               </p>
5318
   * @param string  $encoding      [optional] <p>
5319
   *                               Character encoding name to use.
5320
   *                               If it is omitted, internal character encoding is used.
5321
   *                               </p>
5322
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5323
   *
5324
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5325
   */
5326 7 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5327
  {
5328 7
    if ($encoding !== 'UTF-8') {
5329
      $encoding = self::normalize_encoding($encoding);
5330
    }
5331
5332
    if ($cleanUtf8 === true) {
5333
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5334
      // if invalid characters are found in $haystack before $needle
5335
      $needle = self::clean($needle);
5336
      $haystack = self::clean($haystack);
5337
    }
5338
5339
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5340 1
  }
5341
5342 1
  /**
5343
   * Find position of last occurrence of a case-insensitive string.
5344
   *
5345
   * @param string  $haystack  <p>The string to look in.</p>
5346
   * @param string  $needle    <p>The string to look for.</p>
5347
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5348
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5349
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5350
   *
5351
   * @return int|false <p>
5352
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5353
   *                   not found, it returns false.
5354 1
   *                   </p>
5355
   */
5356 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5357
  {
5358
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5359
      $needle = (string)self::chr($needle);
5360
    }
5361
5362
    // init
5363
    $haystack = (string)$haystack;
5364
    $needle = (string)$needle;
5365
    $offset = (int)$offset;
5366
5367
    if (!isset($haystack[0], $needle[0])) {
5368 1
      return false;
5369
    }
5370 1
5371 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5372
        $cleanUtf8 === true
5373
        ||
5374
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5375
    ) {
5376
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5377
5378
      $needle = self::clean($needle);
5379
      $haystack = self::clean($haystack);
5380
    }
5381
5382 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5383
        $encoding === 'UTF-8'
5384
        ||
5385 13
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5386
    ) {
5387 13
      $encoding = 'UTF-8';
5388
    } else {
5389
      $encoding = self::normalize_encoding($encoding);
5390 13
    }
5391
5392 13
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5393 3
      self::checkForSupport();
5394
    }
5395
5396 11 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5397
        $encoding !== 'UTF-8'
5398
        &&
5399 11
        self::$support['mbstring'] === false
5400 7
    ) {
5401
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5402
    }
5403 5
5404 1
    if (self::$support['mbstring'] === true) {
5405
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5406
    }
5407
5408 1 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5409 1
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
5410
      if ($returnTmp !== false) {
5411
        return $returnTmp;
5412 1
      }
5413 1
    }
5414
5415
    // fallback via vanilla php
5416 1
5417
    return self::strrpos(self::strtonatfold($haystack), self::strtonatfold($needle), $offset, $encoding, $cleanUtf8);
5418
  }
5419 1
5420
  /**
5421 5
   * Find position of last occurrence of a string in a string.
5422 5
   *
5423 5
   * @link http://php.net/manual/en/function.mb-strrpos.php
5424
   *
5425 5
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5426
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5427 5
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5428 5
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5429
   *                              the end of the string.
5430
   *                              </p>
5431 5
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5432
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5433
   *
5434 5
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5435 5
   *                   is not found, it returns false.</p>
5436 5
   */
5437
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5438 5
  {
5439 2
    if ((int)$needle === $needle && $needle >= 0) {
5440
      $needle = (string)self::chr($needle);
5441 2
    }
5442 2
5443 2
    // init
5444
    $haystack = (string)$haystack;
5445 2
    $needle = (string)$needle;
5446 1
    $offset = (int)$offset;
5447
5448 1
    if (!isset($haystack[0], $needle[0])) {
5449 1
      return false;
5450 1
    }
5451
5452 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5453
        $cleanUtf8 === true
5454
        ||
5455
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5456
    ) {
5457
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5458
      $needle = self::clean($needle);
5459
      $haystack = self::clean($haystack);
5460
    }
5461
5462 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5463
        $encoding === 'UTF-8'
5464
        ||
5465
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5466
    ) {
5467 1
      $encoding = 'UTF-8';
5468 2
    } else {
5469
      $encoding = self::normalize_encoding($encoding);
5470 5
    }
5471
5472
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5473
      self::checkForSupport();
5474
    }
5475 5
5476 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5477
        $encoding !== 'UTF-8'
5478
        &&
5479
        self::$support['mbstring'] === false
5480 5
    ) {
5481 5
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5482 1
    }
5483 1
5484 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5485 1
      $returnTmp = \mb_strrpos($haystack, $needle, $offset, $encoding);
5486 1
      if ($returnTmp !== false) {
5487 1
        return $returnTmp;
5488
      }
5489 1
    }
5490
5491 5 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5492 5
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
5493 5
      if ($returnTmp !== false) {
5494 5
        return $returnTmp;
5495 1
      }
5496
    }
5497 5
5498
    // fallback via vanilla php
5499 5
5500
    if ($offset > 0) {
5501
      $haystack = self::substr($haystack, $offset);
5502
    } elseif ($offset < 0) {
5503
      $haystack = self::substr($haystack, 0, $offset);
5504
      $offset = 0;
5505
    }
5506
5507
    $pos = strrpos($haystack, $needle);
5508
    if ($pos === false) {
5509 2
      return false;
5510
    }
5511 2
5512
    return $offset + self::strlen(substr($haystack, 0, $pos));
5513 1
  }
5514
5515
  /**
5516 1
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5517 1
   * mask.
5518
   *
5519 1
   * @param string $str    <p>The input string.</p>
5520
   * @param string $mask   <p>The mask of chars</p>
5521
   * @param int    $offset [optional]
5522 2
   * @param int    $length [optional]
5523
   *
5524 2
   * @return int
5525 1
   */
5526
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5527
  {
5528 2
    // init
5529
    $length = (int)$length;
5530
    $offset = (int)$offset;
5531
5532
    if ($offset || 2147483647 !== $length) {
5533
      $str = self::substr($str, $offset, $length);
5534
    }
5535
5536
    $str = (string)$str;
5537
    if (!isset($str[0], $mask[0])) {
5538
      return 0;
5539
    }
5540 1
5541
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5542 1
  }
5543
5544
  /**
5545
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5546
   *
5547
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5548
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5549
   * @param bool    $before_needle [optional] <p>
5550
   *                               If <b>TRUE</b>, strstr() returns the part of the
5551
   *                               haystack before the first occurrence of the needle (excluding the needle).
5552
   *                               </p>
5553
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5554
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5555
   *
5556
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5557
   */
5558
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5559
  {
5560
    $haystack = (string)$haystack;
5561
    $needle = (string)$needle;
5562
5563
    if (!isset($haystack[0], $needle[0])) {
5564
      return false;
5565
    }
5566
5567
    if ($cleanUtf8 === true) {
5568 20
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5569
      // if invalid characters are found in $haystack before $needle
5570 20
      $needle = self::clean($needle);
5571 2
      $haystack = self::clean($haystack);
5572
    }
5573
5574 2
    if ($encoding !== 'UTF-8') {
5575 2
      $encoding = self::normalize_encoding($encoding);
5576
    }
5577 2
5578
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5579
      self::checkForSupport();
5580 20
    }
5581
5582 20 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5583 4
        $encoding !== 'UTF-8'
5584
        &&
5585
        self::$support['mbstring'] === false
5586 19
    ) {
5587 19
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5588
    }
5589
5590 19
    if (self::$support['mbstring'] === true) {
5591 19
      $returnTmp = \mb_strstr($haystack, $needle, $before_needle, $encoding);
5592
      if ($returnTmp !== false) {
5593 19
        return $returnTmp;
5594 19
      }
5595 19
    }
5596 19
5597
    if (self::$support['intl'] === true) {
5598 19
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
5599
      if ($returnTmp !== false) {
5600 16
        return $returnTmp;
5601 16
      }
5602 16
    }
5603 16
5604 5
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5605 5
5606 5
    if (!isset($match[1])) {
5607
      return false;
5608
    }
5609 19
5610
    if ($before_needle) {
5611 17
      return $match[1];
5612 13
    }
5613 13
5614 13
    return self::substr($haystack, self::strlen($match[1]));
5615 8
  }
5616 8
5617 8
  /**
5618
   * Unicode transformation for case-less matching.
5619
   *
5620 19
   * @link http://unicode.org/reports/tr21/tr21-5.html
5621
   *
5622 9
   * @param string  $str       <p>The input string.</p>
5623 4
   * @param bool    $full      [optional] <p>
5624 4
   *                           <b>true</b>, replace full case folding chars (default)<br />
5625 4
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5626 6
   *                           </p>
5627 6
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5628 6
   *
5629
   * @return string
5630
   */
5631 9
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5632 6
  {
5633 6
    // init
5634 6
    $str = (string)$str;
5635
5636
    if (!isset($str[0])) {
5637 19
      return '';
5638
    }
5639 4
5640 4
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5641 2
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5642 2
5643 3
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5644 3
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5645 3
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5646
    }
5647
5648 4
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5649 16
5650
    if ($full) {
5651 19
5652
      static $fullCaseFold = null;
5653
5654 19
      if ($fullCaseFold === null) {
5655 19
        $fullCaseFold = self::getData('caseFolding_full');
5656
      }
5657 3
5658 19
      /** @noinspection OffsetOperationsInspection */
5659
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5660 19
    }
5661
5662
    if ($cleanUtf8 === true) {
5663 19
      $str = self::clean($str);
5664 19
    }
5665 19
5666 2
    return self::strtolower($str);
5667 19
  }
5668
5669 19
  /**
5670
   * Make a string lowercase.
5671 19
   *
5672
   * @link http://php.net/manual/en/function.mb-strtolower.php
5673
   *
5674
   * @param string  $str       <p>The string being lowercased.</p>
5675
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5676
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5677
   *
5678
   * @return string str with all alphabetic characters converted to lowercase.
5679
   */
5680 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5681
  {
5682
    // init
5683
    $str = (string)$str;
5684
5685
    if (!isset($str[0])) {
5686
      return '';
5687 26
    }
5688
5689 26
    if ($cleanUtf8 === true) {
5690
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5691 26
      // if invalid characters are found in $haystack before $needle
5692 5
      $str = self::clean($str);
5693
    }
5694
5695
    if ($encoding !== 'UTF-8') {
5696 22
      $encoding = self::normalize_encoding($encoding);
5697 6
    }
5698
5699
    return \mb_strtolower($str, $encoding);
5700 16
  }
5701
5702
  /**
5703
   * Generic case sensitive transformation for collation matching.
5704
   *
5705
   * @param string $str <p>The input string</p>
5706
   *
5707
   * @return string
5708
   */
5709
  private static function strtonatfold($str)
5710
  {
5711
    /** @noinspection PhpUndefinedClassInspection */
5712 14
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5713
  }
5714 14
5715
  /**
5716
   * Make a string uppercase.
5717
   *
5718
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5719
   *
5720
   * @param string  $str       <p>The string being uppercased.</p>
5721
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5722
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5723
   *
5724
   * @return string str with all alphabetic characters converted to uppercase.
5725
   */
5726 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5727
  {
5728 1
    $str = (string)$str;
5729
5730 1
    if (!isset($str[0])) {
5731
      return '';
5732
    }
5733
5734
    if ($cleanUtf8 === true) {
5735
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5736
      // if invalid characters are found in $haystack before $needle
5737
      $str = self::clean($str);
5738
    }
5739
5740
    if ($encoding !== 'UTF-8') {
5741
      $encoding = self::normalize_encoding($encoding);
5742
    }
5743
5744 8
    return \mb_strtoupper($str, $encoding);
5745
  }
5746 8
5747 2
  /**
5748
   * Translate characters or replace sub-strings.
5749
   *
5750 7
   * @link  http://php.net/manual/en/function.strtr.php
5751 7
   *
5752 7
   * @param string          $str  <p>The string being translated.</p>
5753
   * @param string|string[] $from <p>The string replacing from.</p>
5754 7
   * @param string|string[] $to   <p>The string being translated to to.</p>
5755 1
   *
5756 1
   * @return string <p>
5757 7
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5758
   *                corresponding character in to.
5759
   *                </p>
5760 7
   */
5761
  public static function strtr($str, $from, $to = INF)
5762 7
  {
5763 7
    if (INF !== $to) {
5764
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5764 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5765
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5765 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5766
      $countFrom = count($from);
5767 7
      $countTo = count($to);
5768
5769
      if ($countFrom > $countTo) {
5770
        $from = array_slice($from, 0, $countTo);
5771 1
      } elseif ($countFrom < $countTo) {
5772 1
        $to = array_slice($to, 0, $countFrom);
5773 1
      }
5774 7
5775 7
      $from = array_combine($from, $to);
5776 7
    }
5777
5778 7
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5761 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5779 7
  }
5780
5781 7
  /**
5782
   * Return the width of a string.
5783
   *
5784
   * @param string  $str       <p>The input string.</p>
5785
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5786
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5787
   *
5788
   * @return int
5789
   */
5790
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5791
  {
5792
    if ($encoding !== 'UTF-8') {
5793
      $encoding = self::normalize_encoding($encoding);
5794
    }
5795
5796
    if ($cleanUtf8 === true) {
5797
      // iconv and mbstring are not tolerant to invalid encoding
5798
      // further, their behaviour is inconsistent with that of PHP's substr
5799
      $str = self::clean($str);
5800
    }
5801 1
5802
    // fallback to "mb_"-function via polyfill
5803 1
    return \mb_strwidth($str, $encoding);
5804
  }
5805 1
5806 1
  /**
5807
   * Get part of a string.
5808
   *
5809 1
   * @link http://php.net/manual/en/function.mb-substr.php
5810
   *
5811 1
   * @param string  $str       <p>The string being checked.</p>
5812
   * @param int     $start     <p>The first position used in str.</p>
5813 1
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5814 1
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5815 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5816 1
   *
5817
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5818 1
   */
5819 1
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5820 1
  {
5821
    // init
5822 1
    $str = (string)$str;
5823
5824
    if (!isset($str[0])) {
5825
      return '';
5826
    }
5827
5828
    if ($cleanUtf8 === true) {
5829
      // iconv and mbstring are not tolerant to invalid encoding
5830 1
      // further, their behaviour is inconsistent with that of PHP's substr
5831
      $str = self::clean($str);
5832
    }
5833
5834
    $str_length = 0;
5835
    if ($start || $length === null) {
5836
      $str_length = (int)self::strlen($str);
5837
    }
5838
5839
    if ($start && $start > $str_length) {
5840
      return false;
5841
    }
5842
5843
    if ($length === null) {
5844
      $length = $str_length;
5845
    } else {
5846
      $length = (int)$length;
5847
    }
5848
5849 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5850
        $encoding === 'UTF-8'
5851
        ||
5852
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5853
    ) {
5854
      $encoding = 'UTF-8';
5855
    } else {
5856
      $encoding = self::normalize_encoding($encoding);
5857
    }
5858
5859
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5860
      self::checkForSupport();
5861
    }
5862
5863 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5864
        $encoding !== 'UTF-8'
5865
        &&
5866
        self::$support['mbstring'] === false
5867
    ) {
5868
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5869
    }
5870
5871
    if (self::$support['mbstring'] === true) {
5872
      return \mb_substr($str, $start, $length, $encoding);
5873
    }
5874
5875
    if (
5876
        $length >= 0 // "iconv_substr()" can't handle negative length
5877
        &&
5878
        self::$support['iconv'] === true
5879
    ) {
5880
      return \iconv_substr($str, $start, $length);
5881
    }
5882
5883
    if (self::$support['intl'] === true) {
5884
      return \grapheme_substr($str, $start, $length);
5885
    }
5886
5887
    // fallback via vanilla php
5888
5889
    // split to array, and remove invalid characters
5890
    $array = self::split($str);
5891
5892
    // extract relevant part, and join to make sting again
5893
    return implode('', array_slice($array, $start, $length));
5894
  }
5895
5896
  /**
5897
   * Binary safe comparison of two strings from an offset, up to length characters.
5898
   *
5899
   * @param string  $main_str           <p>The main string being compared.</p>
5900
   * @param string  $str                <p>The secondary string being compared.</p>
5901
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5902
   *                                    the end of the string.</p>
5903
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5904
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5905
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5906
   *                                    insensitive.</p>
5907
   *
5908
   * @return int
5909
   */
5910
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5911
  {
5912
    $main_str = self::substr($main_str, $offset, $length);
5913
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5912 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5914
5915
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5912 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5913 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5912 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5913 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5916
  }
5917
5918
  /**
5919
   * Count the number of substring occurrences.
5920
   *
5921
   * @link  http://php.net/manual/en/function.substr-count.php
5922
   *
5923
   * @param string  $haystack  <p>The string to search in.</p>
5924
   * @param string  $needle    <p>The substring to search for.</p>
5925
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5926
   * @param int     $length    [optional] <p>
5927
   *                           The maximum length after the specified offset to search for the
5928
   *                           substring. It outputs a warning if the offset plus the length is
5929
   *                           greater than the haystack length.
5930
   *                           </p>
5931
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5932
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5933
   *
5934
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5935
   */
5936
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5937
  {
5938
    // init
5939
    $haystack = (string)$haystack;
5940
    $needle = (string)$needle;
5941
5942
    if (!isset($haystack[0], $needle[0])) {
5943
      return false;
5944
    }
5945
5946
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5947
      $offset = (int)$offset;
5948
      $length = (int)$length;
5949
5950
      if (
5951
          $length + $offset <= 0
5952
          &&
5953
          Bootup::is_php('7.1') === false
5954
      ) {
5955
        return false;
5956
      }
5957
5958
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5959
    }
5960
5961
    if ($encoding !== 'UTF-8') {
5962
      $encoding = self::normalize_encoding($encoding);
5963
    }
5964
5965
    if ($cleanUtf8 === true) {
5966
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5967
      // if invalid characters are found in $haystack before $needle
5968
      $needle = self::clean($needle);
5969
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5970
    }
5971
5972
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5973
      self::checkForSupport();
5974
    }
5975
5976 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5977
        $encoding !== 'UTF-8'
5978
        &&
5979
        self::$support['mbstring'] === false
5980
    ) {
5981
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5982
    }
5983
5984
    if (self::$support['mbstring'] === true) {
5985
      return \mb_substr_count($haystack, $needle, $encoding);
5986
    }
5987
5988
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5989
5990
    return count($matches);
5991
  }
5992
5993
  /**
5994
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5995
   *
5996
   * @param string $haystack <p>The string to search in.</p>
5997
   * @param string $needle   <p>The substring to search for.</p>
5998
   *
5999
   * @return string <p>Return the sub-string.</p>
6000
   */
6001 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6002
  {
6003
    // init
6004
    $haystack = (string)$haystack;
6005
    $needle = (string)$needle;
6006
6007
    if (!isset($haystack[0])) {
6008
      return '';
6009
    }
6010
6011
    if (!isset($needle[0])) {
6012
      return $haystack;
6013
    }
6014
6015
    if (self::str_istarts_with($haystack, $needle) === true) {
6016
      $haystack = self::substr($haystack, self::strlen($needle));
6017
    }
6018
6019
    return $haystack;
6020
  }
6021
6022
  /**
6023
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6024
   *
6025
   * @param string $haystack <p>The string to search in.</p>
6026
   * @param string $needle   <p>The substring to search for.</p>
6027
   *
6028
   * @return string <p>Return the sub-string.</p>
6029
   */
6030 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6031
  {
6032
    // init
6033
    $haystack = (string)$haystack;
6034
    $needle = (string)$needle;
6035
6036
    if (!isset($haystack[0])) {
6037
      return '';
6038
    }
6039
6040
    if (!isset($needle[0])) {
6041
      return $haystack;
6042
    }
6043
6044
    if (self::str_iends_with($haystack, $needle) === true) {
6045
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6046
    }
6047
6048
    return $haystack;
6049
  }
6050
6051
  /**
6052
   * Removes an prefix ($needle) from start of the string ($haystack).
6053
   *
6054
   * @param string $haystack <p>The string to search in.</p>
6055
   * @param string $needle   <p>The substring to search for.</p>
6056
   *
6057 1
   * @return string <p>Return the sub-string.</p>
6058
   */
6059 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6060
  {
6061
    // init
6062
    $haystack = (string)$haystack;
6063
    $needle = (string)$needle;
6064
6065
    if (!isset($haystack[0])) {
6066
      return '';
6067
    }
6068
6069 6
    if (!isset($needle[0])) {
6070
      return $haystack;
6071 6
    }
6072 6
6073
    if (self::str_starts_with($haystack, $needle) === true) {
6074 6
      $haystack = self::substr($haystack, self::strlen($needle));
6075
    }
6076 6
6077 3
    return $haystack;
6078
  }
6079
6080
  /**
6081 6
   * Replace text within a portion of a string.
6082
   *
6083 6
   * source: https://gist.github.com/stemar/8287074
6084 1
   *
6085 1
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6086 1
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6087
   * @param int|int[]       $start            <p>
6088 6
   *                                          If start is positive, the replacing will begin at the start'th offset
6089
   *                                          into string.
6090
   *                                          <br /><br />
6091
   *                                          If start is negative, the replacing will begin at the start'th character
6092
   *                                          from the end of string.
6093
   *                                          </p>
6094
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6095
   *                                          portion of string which is to be replaced. If it is negative, it
6096
   *                                          represents the number of characters from the end of string at which to
6097
   *                                          stop replacing. If it is not given, then it will default to strlen(
6098 6
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6099
   *                                          length is zero then this function will have the effect of inserting
6100 6
   *                                          replacement into string at the given start offset.</p>
6101
   *
6102 6
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6103 6
   */
6104
  public static function substr_replace($str, $replacement, $start, $length = null)
6105
  {
6106 5
    if (is_array($str)) {
6107 5
      $num = count($str);
6108
6109 5
      // $replacement
6110 1
      if (is_array($replacement)) {
6111 1
        $replacement = array_slice($replacement, 0, $num);
6112 1
      } else {
6113
        $replacement = array_pad(array($replacement), $num, $replacement);
6114 5
      }
6115
6116
      // $start
6117 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6118
        $start = array_slice($start, 0, $num);
6119
        foreach ($start as &$valueTmp) {
6120
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6121
        }
6122
        unset($valueTmp);
6123
      } else {
6124
        $start = array_pad(array($start), $num, $start);
6125
      }
6126
6127
      // $length
6128
      if (!isset($length)) {
6129
        $length = array_fill(0, $num, 0);
6130 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6131
        $length = array_slice($length, 0, $num);
6132
        foreach ($length as &$valueTmpV2) {
6133
          if (isset($valueTmpV2)) {
6134
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6135
          } else {
6136
            $valueTmpV2 = 0;
6137
          }
6138
        }
6139
        unset($valueTmpV2);
6140
      } else {
6141
        $length = array_pad(array($length), $num, $length);
6142
      }
6143
6144 1
      // Recursive call
6145
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6146 1
6147
    } else {
6148
6149
      if (is_array($replacement)) {
6150
        if (count($replacement) > 0) {
6151
          $replacement = $replacement[0];
6152
        } else {
6153
          $replacement = '';
6154
        }
6155
      }
6156
    }
6157
6158 1
    // init
6159
    $str = (string)$str;
6160 1
    $replacement = (string)$replacement;
6161
6162 1
    if (!isset($str[0])) {
6163 1
      return $replacement;
6164
    }
6165
6166 1
    preg_match_all('/./us', $str, $smatches);
6167
    preg_match_all('/./us', $replacement, $rmatches);
6168 1
6169 1
    if ($length === null) {
6170
      $length = (int)self::strlen($str);
6171
    }
6172 1
6173
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6174
6175 1
    return implode('', $smatches[0]);
6176 1
  }
6177 1
6178 1
  /**
6179 1
   * Removes an suffix ($needle) from end of the string ($haystack).
6180
   *
6181
   * @param string $haystack <p>The string to search in.</p>
6182 1
   * @param string $needle   <p>The substring to search for.</p>
6183
   *
6184
   * @return string <p>Return the sub-string.</p>
6185
   */
6186 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6187
  {
6188
    $haystack = (string)$haystack;
6189
    $needle = (string)$needle;
6190
6191
    if (!isset($haystack[0])) {
6192
      return '';
6193
    }
6194
6195
    if (!isset($needle[0])) {
6196
      return $haystack;
6197
    }
6198
6199
    if (self::str_ends_with($haystack, $needle) === true) {
6200
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6201 10
    }
6202
6203 10
    return $haystack;
6204 10
  }
6205
6206 10
  /**
6207 3
   * Returns a case swapped version of the string.
6208
   *
6209
   * @param string  $str       <p>The input string.</p>
6210 8
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6211 8
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6212 8
   *
6213
   * @return string <p>Each character's case swapped.</p>
6214 8
   */
6215
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6216 8
  {
6217
    $str = (string)$str;
6218 8
6219 1
    if (!isset($str[0])) {
6220 1
      return '';
6221 1
    }
6222
6223 8
    if ($encoding !== 'UTF-8') {
6224 8
      $encoding = self::normalize_encoding($encoding);
6225
    }
6226 8
6227 8
    if ($cleanUtf8 === true) {
6228 8
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6229 8
      // if invalid characters are found in $haystack before $needle
6230 8
      $str = self::clean($str);
6231
    }
6232 8
6233 8
    $strSwappedCase = preg_replace_callback(
6234 8
        '/[\S]/u',
6235 8
        function ($match) use ($encoding) {
6236
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6237 8
6238 6
          if ($match[0] === $marchToUpper) {
6239 6
            return UTF8::strtolower($match[0], $encoding);
6240 6
          } else {
6241 6
            return $marchToUpper;
6242
          }
6243 6
        },
6244 3
        $str
6245 3
    );
6246
6247 6
    return $strSwappedCase;
6248 6
  }
6249
6250 8
  /**
6251
   * alias for "UTF8::to_ascii()"
6252
   *
6253
   * @see UTF8::to_ascii()
6254
   *
6255
   * @param string $s
6256
   * @param string $subst_chr
6257
   * @param bool   $strict
6258 1
   *
6259
   * @return string
6260 1
   *
6261
   * @deprecated
6262
   */
6263
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6264
  {
6265
    return self::to_ascii($s, $subst_chr, $strict);
6266
  }
6267
6268
  /**
6269
   * alias for "UTF8::to_iso8859()"
6270
   *
6271
   * @see UTF8::to_iso8859()
6272
   *
6273
   * @param string $str
6274
   *
6275
   * @return string|string[]
6276
   *
6277
   * @deprecated
6278
   */
6279
  public static function toIso8859($str)
6280
  {
6281
    return self::to_iso8859($str);
6282
  }
6283
6284
  /**
6285
   * alias for "UTF8::to_latin1()"
6286
   *
6287
   * @see UTF8::to_latin1()
6288
   *
6289
   * @param $str
6290
   *
6291
   * @return string
6292
   *
6293
   * @deprecated
6294
   */
6295
  public static function toLatin1($str)
6296
  {
6297
    return self::to_latin1($str);
6298
  }
6299
6300
  /**
6301
   * alias for "UTF8::to_utf8()"
6302
   *
6303
   * @see UTF8::to_utf8()
6304
   *
6305
   * @param string $str
6306
   *
6307
   * @return string
6308
   *
6309
   * @deprecated
6310
   */
6311
  public static function toUTF8($str)
6312
  {
6313
    return self::to_utf8($str);
6314
  }
6315
6316
  /**
6317
   * Convert a string into ASCII.
6318
   *
6319
   * @param string $str     <p>The input string.</p>
6320
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6321
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6322
   *                        performance</p>
6323
   *
6324
   * @return string
6325
   *
6326
   * @throws \Exception
6327
   */
6328
  public static function to_ascii($str, $unknown = '?', $strict = false)
6329
  {
6330
    static $UTF8_TO_ASCII;
6331
6332
    // init
6333
    $str = (string)$str;
6334
6335
    if (!isset($str[0])) {
6336
      return '';
6337
    }
6338
6339
    $str = self::clean($str, true, true, true);
6340
6341
    // check if we only have ASCII
6342
    if (self::is_ascii($str) === true) {
6343
      return $str;
6344
    }
6345
6346
    if ($strict === true) {
6347
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
6348
        self::checkForSupport();
6349
      }
6350
6351
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6352
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6353
6354
        // check again, if we only have ASCII, now ...
6355
        if (self::is_ascii($str) === true) {
6356
          return $str;
6357
        }
6358
6359
      } else {
6360
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
6361
      }
6362
    }
6363
6364
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6365
    $chars = $ar[0];
6366
    foreach ($chars as &$c) {
6367
6368
      $ordC0 = ord($c[0]);
6369
6370
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6371
        continue;
6372
      }
6373
6374
      $ordC1 = ord($c[1]);
6375
6376
      // ASCII - next please
6377
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6378
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6379
      }
6380
6381
      if ($ordC0 >= 224) {
6382
        $ordC2 = ord($c[2]);
6383
6384
        if ($ordC0 <= 239) {
6385
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6386
        }
6387
6388
        if ($ordC0 >= 240) {
6389
          $ordC3 = ord($c[3]);
6390
6391
          if ($ordC0 <= 247) {
6392
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6393
          }
6394
6395
          if ($ordC0 >= 248) {
6396
            $ordC4 = ord($c[4]);
6397
6398 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6399
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6400
            }
6401
6402
            if ($ordC0 >= 252) {
6403
              $ordC5 = ord($c[5]);
6404
6405 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6406
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6407
              }
6408
            }
6409
          }
6410
        }
6411
      }
6412
6413
      if ($ordC0 >= 254 && $ordC0 <= 255) {
6414
        $c = $unknown;
6415
        continue;
6416
      }
6417
6418
      if (!isset($ord)) {
6419
        $c = $unknown;
6420
        continue;
6421
      }
6422
6423
      $bank = $ord >> 8;
6424
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
6425
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
6426
        if (file_exists($bankfile)) {
6427
          /** @noinspection PhpIncludeInspection */
6428
          require $bankfile;
6429
        } else {
6430
          $UTF8_TO_ASCII[$bank] = array();
6431
        }
6432
      }
6433
6434
      $newchar = $ord & 255;
6435
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
6436
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6437
      } else {
6438
        $c = $unknown;
6439
      }
6440
    }
6441
6442
    return implode('', $chars);
6443
  }
6444
6445
  /**
6446
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6447
   *
6448
   * @param string|string[] $str
6449
   *
6450
   * @return string|string[]
6451
   */
6452
  public static function to_iso8859($str)
6453
  {
6454
    if (is_array($str)) {
6455
6456
      /** @noinspection ForeachSourceInspection */
6457
      foreach ($str as $k => $v) {
6458
        /** @noinspection AlterInForeachInspection */
6459
        /** @noinspection OffsetOperationsInspection */
6460
        $str[$k] = self::to_iso8859($v);
6461
      }
6462
6463
      return $str;
6464
    }
6465
6466
    $str = (string)$str;
6467
6468
    if (!isset($str[0])) {
6469
      return '';
6470
    }
6471
6472
    return self::utf8_decode($str);
6473
  }
6474
6475
  /**
6476
   * alias for "UTF8::to_iso8859()"
6477
   *
6478
   * @see UTF8::to_iso8859()
6479
   *
6480
   * @param string|string[] $str
6481
   *
6482
   * @return string|string[]
6483
   */
6484
  public static function to_latin1($str)
6485
  {
6486
    return self::to_iso8859($str);
6487
  }
6488
6489
  /**
6490
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6491
   *
6492
   * - It decode UTF-8 codepoints and unicode escape sequences.
6493
   *
6494
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6495
   *
6496
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6497
   *
6498
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6499
   *    are followed by any of these:  ("group B")
6500
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6501
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6502
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6503
   * is also a valid unicode character, and will be left unchanged.
6504
   *
6505
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6506
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6507
   *
6508
   * @param string|string[] $str                    <p>Any string or array.</p>
6509
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6510
   *
6511
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6512
   */
6513
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6514
  {
6515
    if (is_array($str)) {
6516
      /** @noinspection ForeachSourceInspection */
6517
      foreach ($str as $k => $v) {
6518
        /** @noinspection AlterInForeachInspection */
6519
        /** @noinspection OffsetOperationsInspection */
6520
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6521
      }
6522
6523
      return $str;
6524
    }
6525
6526
    $str = (string)$str;
6527
6528
    if (!isset($str[0])) {
6529
      return $str;
6530
    }
6531
6532
    $max = strlen($str);
6533
    $buf = '';
6534
6535
    /** @noinspection ForeachInvariantsInspection */
6536
    for ($i = 0; $i < $max; $i++) {
6537
      $c1 = $str[$i];
6538
6539
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6540
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6541
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6542
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6543
6544
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6545
6546
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6547
            $buf .= $c1 . $c2;
6548
            $i++;
6549
          } else { // not valid UTF8 - convert it
6550
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6551
            $cc2 = ($c1 & "\x3f") | "\x80";
6552
            $buf .= $cc1 . $cc2;
6553
          }
6554
6555 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6556
6557
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6558
            $buf .= $c1 . $c2 . $c3;
6559
            $i += 2;
6560
          } else { // not valid UTF8 - convert it
6561
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6562
            $cc2 = ($c1 & "\x3f") | "\x80";
6563
            $buf .= $cc1 . $cc2;
6564
          }
6565
6566
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6567
6568 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6569
            $buf .= $c1 . $c2 . $c3 . $c4;
6570
            $i += 3;
6571
          } else { // not valid UTF8 - convert it
6572
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6573
            $cc2 = ($c1 & "\x3f") | "\x80";
6574
            $buf .= $cc1 . $cc2;
6575
          }
6576
6577
        } else { // doesn't look like UTF8, but should be converted
6578
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6579
          $cc2 = (($c1 & "\x3f") | "\x80");
6580
          $buf .= $cc1 . $cc2;
6581
        }
6582
6583
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6584
6585
        $ordC1 = ord($c1);
6586
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6587
          $buf .= self::$win1252ToUtf8[$ordC1];
6588
        } else {
6589
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6590
          $cc2 = (($c1 & "\x3f") | "\x80");
6591
          $buf .= $cc1 . $cc2;
6592
        }
6593
6594
      } else { // it doesn't need conversion
6595
        $buf .= $c1;
6596
      }
6597
    }
6598
6599
    // decode unicode escape sequences
6600
    $buf = preg_replace_callback(
6601
        '/\\\\u([0-9a-f]{4})/i',
6602
        function ($match) {
6603
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6604
        },
6605
        $buf
6606
    );
6607
6608
    // decode UTF-8 codepoints
6609
    if ($decodeHtmlEntityToUtf8 === true) {
6610
      $buf = self::html_entity_decode($buf);
6611
    }
6612
6613
    return $buf;
6614
  }
6615
6616
  /**
6617
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6618
   *
6619
   * INFO: This is slower then "trim()"
6620
   *
6621
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6622
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6623
   *
6624
   * @param string $str   <p>The string to be trimmed</p>
6625
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6626
   *
6627
   * @return string <p>The trimmed string.</p>
6628
   */
6629
  public static function trim($str = '', $chars = INF)
6630
  {
6631
    $str = (string)$str;
6632
6633
    if (!isset($str[0])) {
6634
      return '';
6635
    }
6636
6637
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6638
    if ($chars === INF || !$chars) {
6639
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6640
    }
6641
6642
    return self::rtrim(self::ltrim($str, $chars), $chars);
6643
  }
6644
6645
  /**
6646
   * Makes string's first char uppercase.
6647
   *
6648
   * @param string  $str       <p>The input string.</p>
6649
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6650
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6651
   *
6652
   * @return string <p>The resulting string</p>
6653
   */
6654
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6655
  {
6656
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6657
  }
6658
6659
  /**
6660
   * alias for "UTF8::ucfirst()"
6661
   *
6662
   * @see UTF8::ucfirst()
6663
   *
6664
   * @param string  $word
6665
   * @param string  $encoding
6666
   * @param boolean $cleanUtf8
6667
   *
6668
   * @return string
6669
   */
6670
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6671
  {
6672
    return self::ucfirst($word, $encoding, $cleanUtf8);
6673
  }
6674
6675
  /**
6676
   * Uppercase for all words in the string.
6677
   *
6678
   * @param string   $str        <p>The input string.</p>
6679
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6680
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6681
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6682
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6683
   *
6684
   * @return string
6685
   */
6686
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6687
  {
6688
    if (!$str) {
6689
      return '';
6690
    }
6691
6692
    $words = self::str_to_words($str, $charlist);
6693
    $newwords = array();
6694
6695
    if (count($exceptions) > 0) {
6696
      $useExceptions = true;
6697
    } else {
6698
      $useExceptions = false;
6699
    }
6700
6701
    foreach ($words as $word) {
6702
6703
      if (!$word) {
6704
        continue;
6705
      }
6706
6707
      if (
6708
          ($useExceptions === false)
6709
          ||
6710
          (
6711
              $useExceptions === true
6712
              &&
6713
              !in_array($word, $exceptions, true)
6714
          )
6715
      ) {
6716
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6717
      }
6718
6719
      $newwords[] = $word;
6720
    }
6721
6722
    return implode('', $newwords);
6723
  }
6724
6725
  /**
6726
   * Multi decode html entity & fix urlencoded-win1252-chars.
6727
   *
6728
   * e.g:
6729
   * 'test+test'                     => 'test test'
6730
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6731
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6732
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6733
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6734
   * 'Düsseldorf'                   => 'Düsseldorf'
6735
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6736
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6737
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6738
   *
6739
   * @param string $str          <p>The input string.</p>
6740
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6741
   *
6742
   * @return string
6743
   */
6744 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6745
  {
6746
    $str = (string)$str;
6747
6748
    if (!isset($str[0])) {
6749
      return '';
6750
    }
6751
6752
    $pattern = '/%u([0-9a-f]{3,4})/i';
6753
    if (preg_match($pattern, $str)) {
6754
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6755
    }
6756
6757
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6758
6759
    do {
6760
      $str_compare = $str;
6761
6762
      $str = self::fix_simple_utf8(
6763
          urldecode(
6764
              self::html_entity_decode(
6765
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6766
                  $flags
6767
              )
6768
          )
6769
      );
6770
6771
    } while ($multi_decode === true && $str_compare !== $str);
6772
6773
    return (string)$str;
6774
  }
6775
6776
  /**
6777
   * Return a array with "urlencoded"-win1252 -> UTF-8
6778
   *
6779
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6780
   *
6781
   * @return array
6782
   */
6783
  public static function urldecode_fix_win1252_chars()
6784
  {
6785
    static $array = array(
6786
        '%20' => ' ',
6787
        '%21' => '!',
6788
        '%22' => '"',
6789
        '%23' => '#',
6790
        '%24' => '$',
6791
        '%25' => '%',
6792
        '%26' => '&',
6793
        '%27' => "'",
6794
        '%28' => '(',
6795
        '%29' => ')',
6796
        '%2A' => '*',
6797
        '%2B' => '+',
6798
        '%2C' => ',',
6799
        '%2D' => '-',
6800
        '%2E' => '.',
6801
        '%2F' => '/',
6802
        '%30' => '0',
6803
        '%31' => '1',
6804
        '%32' => '2',
6805
        '%33' => '3',
6806
        '%34' => '4',
6807
        '%35' => '5',
6808
        '%36' => '6',
6809
        '%37' => '7',
6810
        '%38' => '8',
6811
        '%39' => '9',
6812
        '%3A' => ':',
6813
        '%3B' => ';',
6814
        '%3C' => '<',
6815
        '%3D' => '=',
6816
        '%3E' => '>',
6817
        '%3F' => '?',
6818
        '%40' => '@',
6819
        '%41' => 'A',
6820
        '%42' => 'B',
6821
        '%43' => 'C',
6822
        '%44' => 'D',
6823
        '%45' => 'E',
6824
        '%46' => 'F',
6825
        '%47' => 'G',
6826
        '%48' => 'H',
6827
        '%49' => 'I',
6828
        '%4A' => 'J',
6829
        '%4B' => 'K',
6830
        '%4C' => 'L',
6831
        '%4D' => 'M',
6832
        '%4E' => 'N',
6833
        '%4F' => 'O',
6834
        '%50' => 'P',
6835
        '%51' => 'Q',
6836
        '%52' => 'R',
6837
        '%53' => 'S',
6838
        '%54' => 'T',
6839
        '%55' => 'U',
6840
        '%56' => 'V',
6841
        '%57' => 'W',
6842
        '%58' => 'X',
6843
        '%59' => 'Y',
6844
        '%5A' => 'Z',
6845
        '%5B' => '[',
6846
        '%5C' => '\\',
6847
        '%5D' => ']',
6848
        '%5E' => '^',
6849
        '%5F' => '_',
6850
        '%60' => '`',
6851
        '%61' => 'a',
6852
        '%62' => 'b',
6853
        '%63' => 'c',
6854
        '%64' => 'd',
6855
        '%65' => 'e',
6856
        '%66' => 'f',
6857
        '%67' => 'g',
6858
        '%68' => 'h',
6859
        '%69' => 'i',
6860
        '%6A' => 'j',
6861
        '%6B' => 'k',
6862
        '%6C' => 'l',
6863
        '%6D' => 'm',
6864
        '%6E' => 'n',
6865
        '%6F' => 'o',
6866
        '%70' => 'p',
6867
        '%71' => 'q',
6868
        '%72' => 'r',
6869
        '%73' => 's',
6870
        '%74' => 't',
6871
        '%75' => 'u',
6872
        '%76' => 'v',
6873
        '%77' => 'w',
6874
        '%78' => 'x',
6875
        '%79' => 'y',
6876
        '%7A' => 'z',
6877
        '%7B' => '{',
6878
        '%7C' => '|',
6879
        '%7D' => '}',
6880
        '%7E' => '~',
6881
        '%7F' => '',
6882
        '%80' => '`',
6883
        '%81' => '',
6884
        '%82' => '‚',
6885
        '%83' => 'ƒ',
6886
        '%84' => '„',
6887
        '%85' => '…',
6888
        '%86' => '†',
6889
        '%87' => '‡',
6890
        '%88' => 'ˆ',
6891
        '%89' => '‰',
6892
        '%8A' => 'Š',
6893
        '%8B' => '‹',
6894
        '%8C' => 'Œ',
6895
        '%8D' => '',
6896
        '%8E' => 'Ž',
6897
        '%8F' => '',
6898
        '%90' => '',
6899
        '%91' => '‘',
6900
        '%92' => '’',
6901
        '%93' => '“',
6902
        '%94' => '”',
6903
        '%95' => '•',
6904
        '%96' => '–',
6905
        '%97' => '—',
6906
        '%98' => '˜',
6907
        '%99' => '™',
6908
        '%9A' => 'š',
6909
        '%9B' => '›',
6910
        '%9C' => 'œ',
6911
        '%9D' => '',
6912
        '%9E' => 'ž',
6913
        '%9F' => 'Ÿ',
6914
        '%A0' => '',
6915
        '%A1' => '¡',
6916
        '%A2' => '¢',
6917
        '%A3' => '£',
6918
        '%A4' => '¤',
6919
        '%A5' => '¥',
6920
        '%A6' => '¦',
6921
        '%A7' => '§',
6922
        '%A8' => '¨',
6923
        '%A9' => '©',
6924
        '%AA' => 'ª',
6925
        '%AB' => '«',
6926
        '%AC' => '¬',
6927
        '%AD' => '',
6928
        '%AE' => '®',
6929
        '%AF' => '¯',
6930
        '%B0' => '°',
6931
        '%B1' => '±',
6932
        '%B2' => '²',
6933
        '%B3' => '³',
6934
        '%B4' => '´',
6935
        '%B5' => 'µ',
6936
        '%B6' => '¶',
6937
        '%B7' => '·',
6938
        '%B8' => '¸',
6939
        '%B9' => '¹',
6940
        '%BA' => 'º',
6941
        '%BB' => '»',
6942
        '%BC' => '¼',
6943
        '%BD' => '½',
6944
        '%BE' => '¾',
6945
        '%BF' => '¿',
6946
        '%C0' => 'À',
6947
        '%C1' => 'Á',
6948
        '%C2' => 'Â',
6949
        '%C3' => 'Ã',
6950
        '%C4' => 'Ä',
6951
        '%C5' => 'Å',
6952
        '%C6' => 'Æ',
6953
        '%C7' => 'Ç',
6954
        '%C8' => 'È',
6955
        '%C9' => 'É',
6956
        '%CA' => 'Ê',
6957
        '%CB' => 'Ë',
6958
        '%CC' => 'Ì',
6959
        '%CD' => 'Í',
6960
        '%CE' => 'Î',
6961
        '%CF' => 'Ï',
6962
        '%D0' => 'Ð',
6963
        '%D1' => 'Ñ',
6964
        '%D2' => 'Ò',
6965
        '%D3' => 'Ó',
6966
        '%D4' => 'Ô',
6967
        '%D5' => 'Õ',
6968
        '%D6' => 'Ö',
6969
        '%D7' => '×',
6970
        '%D8' => 'Ø',
6971
        '%D9' => 'Ù',
6972
        '%DA' => 'Ú',
6973
        '%DB' => 'Û',
6974
        '%DC' => 'Ü',
6975
        '%DD' => 'Ý',
6976
        '%DE' => 'Þ',
6977
        '%DF' => 'ß',
6978
        '%E0' => 'à',
6979
        '%E1' => 'á',
6980
        '%E2' => 'â',
6981
        '%E3' => 'ã',
6982
        '%E4' => 'ä',
6983
        '%E5' => 'å',
6984
        '%E6' => 'æ',
6985
        '%E7' => 'ç',
6986
        '%E8' => 'è',
6987
        '%E9' => 'é',
6988
        '%EA' => 'ê',
6989
        '%EB' => 'ë',
6990
        '%EC' => 'ì',
6991
        '%ED' => 'í',
6992
        '%EE' => 'î',
6993
        '%EF' => 'ï',
6994
        '%F0' => 'ð',
6995
        '%F1' => 'ñ',
6996
        '%F2' => 'ò',
6997
        '%F3' => 'ó',
6998
        '%F4' => 'ô',
6999
        '%F5' => 'õ',
7000
        '%F6' => 'ö',
7001
        '%F7' => '÷',
7002
        '%F8' => 'ø',
7003
        '%F9' => 'ù',
7004
        '%FA' => 'ú',
7005
        '%FB' => 'û',
7006
        '%FC' => 'ü',
7007
        '%FD' => 'ý',
7008
        '%FE' => 'þ',
7009
        '%FF' => 'ÿ',
7010
    );
7011
7012
    return $array;
7013
  }
7014
7015
  /**
7016
   * Decodes an UTF-8 string to ISO-8859-1.
7017
   *
7018
   * @param string $str <p>The input string.</p>
7019
   *
7020
   * @return string
7021
   */
7022
  public static function utf8_decode($str)
7023
  {
7024
    // init
7025
    $str = (string)$str;
7026
7027
    if (!isset($str[0])) {
7028
      return '';
7029
    }
7030
7031
    $str = (string)self::to_utf8($str);
7032
7033
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7034
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7035
7036
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7037
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
7038
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
7039
    }
7040
7041
    /** @noinspection PhpInternalEntityUsedInspection */
7042
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
7043
  }
7044
7045
  /**
7046
   * Encodes an ISO-8859-1 string to UTF-8.
7047
   *
7048
   * @param string $str <p>The input string.</p>
7049
   *
7050
   * @return string
7051
   */
7052
  public static function utf8_encode($str)
7053
  {
7054
    // init
7055
    $str = (string)$str;
7056
7057
    if (!isset($str[0])) {
7058
      return '';
7059
    }
7060
7061
    $str = \utf8_encode($str);
7062
7063
    if (false === strpos($str, "\xC2")) {
7064
      return $str;
7065
    } else {
7066
7067
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
7068
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
7069
7070
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7071
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
7072
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
7073
      }
7074
7075
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7076
    }
7077
  }
7078
7079
  /**
7080
   * fix -> utf8-win1252 chars
7081
   *
7082
   * @param string $str <p>The input string.</p>
7083
   *
7084
   * @return string
7085
   *
7086
   * @deprecated use "UTF8::fix_simple_utf8()"
7087
   */
7088
  public static function utf8_fix_win1252_chars($str)
7089
  {
7090
    return self::fix_simple_utf8($str);
7091
  }
7092
7093
  /**
7094
   * Returns an array with all utf8 whitespace characters.
7095
   *
7096
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7097
   *
7098
   * @author: Derek E. [email protected]
7099
   *
7100
   * @return array <p>
7101
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7102
   *               as defined in above URL.
7103
   *               </p>
7104
   */
7105
  public static function whitespace_table()
7106
  {
7107
    return self::$whitespaceTable;
7108
  }
7109
7110
  /**
7111
   * Limit the number of words in a string.
7112
   *
7113
   * @param string $str      <p>The input string.</p>
7114
   * @param int    $words    <p>The limit of words as integer.</p>
7115
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7116
   *
7117
   * @return string
7118
   */
7119
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7120
  {
7121
    $str = (string)$str;
7122
7123
    if (!isset($str[0])) {
7124
      return '';
7125
    }
7126
7127
    $words = (int)$words;
7128
7129
    if ($words < 1) {
7130
      return '';
7131
    }
7132
7133
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7134
7135
    if (
7136
        !isset($matches[0])
7137
        ||
7138
        self::strlen($str) === self::strlen($matches[0])
7139
    ) {
7140
      return $str;
7141
    }
7142
7143
    return self::rtrim($matches[0]) . $strAddOn;
7144
  }
7145
7146
  /**
7147
   * Wraps a string to a given number of characters
7148
   *
7149
   * @link  http://php.net/manual/en/function.wordwrap.php
7150
   *
7151
   * @param string $str   <p>The input string.</p>
7152
   * @param int    $width [optional] <p>The column width.</p>
7153
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7154
   * @param bool   $cut   [optional] <p>
7155
   *                      If the cut is set to true, the string is
7156
   *                      always wrapped at or before the specified width. So if you have
7157
   *                      a word that is larger than the given width, it is broken apart.
7158
   *                      </p>
7159
   *
7160
   * @return string <p>The given string wrapped at the specified column.</p>
7161
   */
7162
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7163
  {
7164
    $str = (string)$str;
7165
    $break = (string)$break;
7166
7167
    if (!isset($str[0], $break[0])) {
7168
      return '';
7169
    }
7170
7171
    $w = '';
7172
    $strSplit = explode($break, $str);
7173
    $count = count($strSplit);
7174
7175
    $chars = array();
7176
    /** @noinspection ForeachInvariantsInspection */
7177
    for ($i = 0; $i < $count; ++$i) {
7178
7179
      if ($i) {
7180
        $chars[] = $break;
7181
        $w .= '#';
7182
      }
7183
7184
      $c = $strSplit[$i];
7185
      unset($strSplit[$i]);
7186
7187
      foreach (self::split($c) as $c) {
7188
        $chars[] = $c;
7189
        $w .= ' ' === $c ? ' ' : '?';
7190
      }
7191
    }
7192
7193
    $strReturn = '';
7194
    $j = 0;
7195
    $b = $i = -1;
7196
    $w = wordwrap($w, $width, '#', $cut);
7197
7198
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7199
      for (++$i; $i < $b; ++$i) {
7200
        $strReturn .= $chars[$j];
7201
        unset($chars[$j++]);
7202
      }
7203
7204
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7205
        unset($chars[$j++]);
7206
      }
7207
7208
      $strReturn .= $break;
7209
    }
7210
7211
    return $strReturn . implode('', $chars);
7212
  }
7213
7214
  /**
7215
   * Returns an array of Unicode White Space characters.
7216
   *
7217
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7218
   */
7219
  public static function ws()
7220
  {
7221
    return self::$whitespace;
7222
  }
7223
7224
}
7225