Completed
Push — master ( 89fd46...4ec48f )
by Lars
03:54
created

UTF8::file_get_contents()   C

Complexity

Conditions 8
Paths 48

Size

Total Lines 43
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 8

Importance

Changes 0
Metric Value
dl 0
loc 43
ccs 21
cts 21
cp 1
rs 5.3846
c 0
b 0
f 0
cc 8
eloc 23
nc 48
nop 7
crap 8
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
8
use Symfony\Polyfill\Xml\Xml;
9
10
/**
11
 * UTF8-Helper-Class
12
 *
13
 * @package voku\helper
14
 */
15
final class UTF8
16
{
17
  /**
18
   * @var array
19
   */
20
  private static $WIN1252_TO_UTF8 = array(
21
      128 => "\xe2\x82\xac", // EURO SIGN
22
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
23
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
24
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
25
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
26
      134 => "\xe2\x80\xa0", // DAGGER
27
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
28
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
29
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
30
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
31
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
32
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
33
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
34
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
35
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
36
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
37
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
38
      149 => "\xe2\x80\xa2", // BULLET
39
      150 => "\xe2\x80\x93", // EN DASH
40
      151 => "\xe2\x80\x94", // EM DASH
41
      152 => "\xcb\x9c", // SMALL TILDE
42
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
43
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
44
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
45
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
46
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
47
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
48
  );
49
50
  /**
51
   * @var array
52
   */
53
  private static $CP1252_TO_UTF8 = array(
54
      '€' => '€',
55
      '‚' => '‚',
56
      'ƒ' => 'ƒ',
57
      '„' => '„',
58
      '…' => '…',
59
      '†' => '†',
60
      '‡' => '‡',
61
      'ˆ' => 'ˆ',
62
      '‰' => '‰',
63
      'Š' => 'Š',
64
      '‹' => '‹',
65
      'Œ' => 'Œ',
66
      'Ž' => 'Ž',
67
      '‘' => '‘',
68
      '’' => '’',
69
      '“' => '“',
70
      '”' => '”',
71
      '•' => '•',
72
      '–' => '–',
73
      '—' => '—',
74
      '˜' => '˜',
75
      '™' => '™',
76
      'š' => 'š',
77
      '›' => '›',
78
      'œ' => 'œ',
79
      'ž' => 'ž',
80
      'Ÿ' => 'Ÿ',
81
  );
82
83
  /**
84
   * Bom => Byte-Length
85
   *
86
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
87
   *
88
   * @var array
89
   */
90
  private static $BOM = array(
91
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
92
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
93
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
94
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
96
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
98
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
99
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
100
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
101
  );
102
103
  /**
104
   * Numeric code point => UTF-8 Character
105
   *
106
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
107
   *
108
   * @var array
109
   */
110
  private static $WHITESPACE = array(
111
    // NUL Byte
112
    0     => "\x0",
113
    // Tab
114
    9     => "\x9",
115
    // New Line
116
    10    => "\xa",
117
    // Vertical Tab
118
    11    => "\xb",
119
    // Carriage Return
120
    13    => "\xd",
121
    // Ordinary Space
122
    32    => "\x20",
123
    // NO-BREAK SPACE
124
    160   => "\xc2\xa0",
125
    // OGHAM SPACE MARK
126
    5760  => "\xe1\x9a\x80",
127
    // MONGOLIAN VOWEL SEPARATOR
128
    6158  => "\xe1\xa0\x8e",
129
    // EN QUAD
130
    8192  => "\xe2\x80\x80",
131
    // EM QUAD
132
    8193  => "\xe2\x80\x81",
133
    // EN SPACE
134
    8194  => "\xe2\x80\x82",
135
    // EM SPACE
136
    8195  => "\xe2\x80\x83",
137
    // THREE-PER-EM SPACE
138
    8196  => "\xe2\x80\x84",
139
    // FOUR-PER-EM SPACE
140
    8197  => "\xe2\x80\x85",
141
    // SIX-PER-EM SPACE
142
    8198  => "\xe2\x80\x86",
143
    // FIGURE SPACE
144
    8199  => "\xe2\x80\x87",
145
    // PUNCTUATION SPACE
146
    8200  => "\xe2\x80\x88",
147
    // THIN SPACE
148
    8201  => "\xe2\x80\x89",
149
    //HAIR SPACE
150
    8202  => "\xe2\x80\x8a",
151
    // LINE SEPARATOR
152
    8232  => "\xe2\x80\xa8",
153
    // PARAGRAPH SEPARATOR
154
    8233  => "\xe2\x80\xa9",
155
    // NARROW NO-BREAK SPACE
156
    8239  => "\xe2\x80\xaf",
157
    // MEDIUM MATHEMATICAL SPACE
158
    8287  => "\xe2\x81\x9f",
159
    // IDEOGRAPHIC SPACE
160
    12288 => "\xe3\x80\x80",
161
  );
162
163
  /**
164
   * @var array
165
   */
166
  private static $WHITESPACE_TABLE = array(
167
      'SPACE'                     => "\x20",
168
      'NO-BREAK SPACE'            => "\xc2\xa0",
169
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
170
      'EN QUAD'                   => "\xe2\x80\x80",
171
      'EM QUAD'                   => "\xe2\x80\x81",
172
      'EN SPACE'                  => "\xe2\x80\x82",
173
      'EM SPACE'                  => "\xe2\x80\x83",
174
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
175
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
176
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
177
      'FIGURE SPACE'              => "\xe2\x80\x87",
178
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
179
      'THIN SPACE'                => "\xe2\x80\x89",
180
      'HAIR SPACE'                => "\xe2\x80\x8a",
181
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
182
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
183
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
184
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
185
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
186
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
187
  );
188
189
  /**
190
   * bidirectional text chars
191
   *
192
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
193
   *
194
   * @var array
195
   */
196
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
197
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
198
    8234 => "\xE2\x80\xAA",
199
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
200
    8235 => "\xE2\x80\xAB",
201
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
202
    8236 => "\xE2\x80\xAC",
203
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
204
    8237 => "\xE2\x80\xAD",
205
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
206
    8238 => "\xE2\x80\xAE",
207
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
208
    8294 => "\xE2\x81\xA6",
209
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
210
    8295 => "\xE2\x81\xA7",
211
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
212
    8296 => "\xE2\x81\xA8",
213
    // POP DIRECTIONAL ISOLATE
214
    8297 => "\xE2\x81\xA9",
215
  );
216
217
  /**
218
   * @var array
219
   */
220
  private static $COMMON_CASE_FOLD = array(
221
      'ſ'            => 's',
222
      "\xCD\x85"     => 'ι',
223
      'ς'            => 'σ',
224
      "\xCF\x90"     => 'β',
225
      "\xCF\x91"     => 'θ',
226
      "\xCF\x95"     => 'φ',
227
      "\xCF\x96"     => 'π',
228
      "\xCF\xB0"     => 'κ',
229
      "\xCF\xB1"     => 'ρ',
230
      "\xCF\xB5"     => 'ε',
231
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
232
      "\xE1\xBE\xBE" => 'ι',
233
  );
234
235
  /**
236
   * @var array
237
   */
238
  private static $BROKEN_UTF8_FIX = array(
239
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
240
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
241
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
242
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
243
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
244
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
245
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
246
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
247
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
248
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
249
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
250
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
251
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
252
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
253
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
254
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
255
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
256
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
257
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
258
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
259
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
260
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
261
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
262
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
263
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
264
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
265
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
266
      'ü'       => 'ü',
267
      'ä'       => 'ä',
268
      'ö'       => 'ö',
269
      'Ö'       => 'Ö',
270
      'ß'       => 'ß',
271
      'Ã '       => 'à',
272
      'á'       => 'á',
273
      'â'       => 'â',
274
      'ã'       => 'ã',
275
      'ù'       => 'ù',
276
      'ú'       => 'ú',
277
      'û'       => 'û',
278
      'Ù'       => 'Ù',
279
      'Ú'       => 'Ú',
280
      'Û'       => 'Û',
281
      'Ü'       => 'Ü',
282
      'ò'       => 'ò',
283
      'ó'       => 'ó',
284
      'ô'       => 'ô',
285
      'è'       => 'è',
286
      'é'       => 'é',
287
      'ê'       => 'ê',
288
      'ë'       => 'ë',
289
      'À'       => 'À',
290
      'Á'       => 'Á',
291
      'Â'       => 'Â',
292
      'Ã'       => 'Ã',
293
      'Ä'       => 'Ä',
294
      'Ã…'       => 'Å',
295
      'Ç'       => 'Ç',
296
      'È'       => 'È',
297
      'É'       => 'É',
298
      'Ê'       => 'Ê',
299
      'Ë'       => 'Ë',
300
      'ÃŒ'       => 'Ì',
301
      'Í'       => 'Í',
302
      'ÃŽ'       => 'Î',
303
      'Ï'       => 'Ï',
304
      'Ñ'       => 'Ñ',
305
      'Ã’'       => 'Ò',
306
      'Ó'       => 'Ó',
307
      'Ô'       => 'Ô',
308
      'Õ'       => 'Õ',
309
      'Ø'       => 'Ø',
310
      'Ã¥'       => 'å',
311
      'æ'       => 'æ',
312
      'ç'       => 'ç',
313
      'ì'       => 'ì',
314
      'í'       => 'í',
315
      'î'       => 'î',
316
      'ï'       => 'ï',
317
      'ð'       => 'ð',
318
      'ñ'       => 'ñ',
319
      'õ'       => 'õ',
320
      'ø'       => 'ø',
321
      'ý'       => 'ý',
322
      'ÿ'       => 'ÿ',
323
      '€'      => '€',
324
      '’'      => '’',
325
  );
326
327
  /**
328
   * @var array
329
   */
330
  private static $UTF8_TO_WIN1252 = array(
331
      "\xe2\x82\xac" => "\x80", // EURO SIGN
332
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
333
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
334
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
335
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
336
      "\xe2\x80\xa0" => "\x86", // DAGGER
337
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
338
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
339
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
340
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
341
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
342
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
343
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
344
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
345
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
346
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
347
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
348
      "\xe2\x80\xa2" => "\x95", // BULLET
349
      "\xe2\x80\x93" => "\x96", // EN DASH
350
      "\xe2\x80\x94" => "\x97", // EM DASH
351
      "\xcb\x9c"     => "\x98", // SMALL TILDE
352
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
353
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
354
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
355
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
356
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
357
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
358
  );
359
360
  /**
361
   * @var array
362
   */
363
  private static $UTF8_MSWORD = array(
364
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
365
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
366
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
367
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
368
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
369
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
370
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
371
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
372
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
373
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
374
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
375
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
376
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
377
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
378
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
379
  );
380
381
  /**
382
   * @var array
383
   */
384
  private static $ICONV_ENCODING = array(
385
      'ANSI_X3.4-1968',
386
      'ANSI_X3.4-1986',
387
      'ASCII',
388
      'CP367',
389
      'IBM367',
390
      'ISO-IR-6',
391
      'ISO646-US',
392
      'ISO_646.IRV:1991',
393
      'US',
394
      'US-ASCII',
395
      'CSASCII',
396
      'UTF-8',
397
      'ISO-10646-UCS-2',
398
      'UCS-2',
399
      'CSUNICODE',
400
      'UCS-2BE',
401
      'UNICODE-1-1',
402
      'UNICODEBIG',
403
      'CSUNICODE11',
404
      'UCS-2LE',
405
      'UNICODELITTLE',
406
      'ISO-10646-UCS-4',
407
      'UCS-4',
408
      'CSUCS4',
409
      'UCS-4BE',
410
      'UCS-4LE',
411
      'UTF-16',
412
      'UTF-16BE',
413
      'UTF-16LE',
414
      'UTF-32',
415
      'UTF-32BE',
416
      'UTF-32LE',
417
      'UNICODE-1-1-UTF-7',
418
      'UTF-7',
419
      'CSUNICODE11UTF7',
420
      'UCS-2-INTERNAL',
421
      'UCS-2-SWAPPED',
422
      'UCS-4-INTERNAL',
423
      'UCS-4-SWAPPED',
424
      'C99',
425
      'JAVA',
426
      'CP819',
427
      'IBM819',
428
      'ISO-8859-1',
429
      'ISO-IR-100',
430
      'ISO8859-1',
431
      'ISO_8859-1',
432
      'ISO_8859-1:1987',
433
      'L1',
434
      'LATIN1',
435
      'CSISOLATIN1',
436
      'ISO-8859-2',
437
      'ISO-IR-101',
438
      'ISO8859-2',
439
      'ISO_8859-2',
440
      'ISO_8859-2:1987',
441
      'L2',
442
      'LATIN2',
443
      'CSISOLATIN2',
444
      'ISO-8859-3',
445
      'ISO-IR-109',
446
      'ISO8859-3',
447
      'ISO_8859-3',
448
      'ISO_8859-3:1988',
449
      'L3',
450
      'LATIN3',
451
      'CSISOLATIN3',
452
      'ISO-8859-4',
453
      'ISO-IR-110',
454
      'ISO8859-4',
455
      'ISO_8859-4',
456
      'ISO_8859-4:1988',
457
      'L4',
458
      'LATIN4',
459
      'CSISOLATIN4',
460
      'CYRILLIC',
461
      'ISO-8859-5',
462
      'ISO-IR-144',
463
      'ISO8859-5',
464
      'ISO_8859-5',
465
      'ISO_8859-5:1988',
466
      'CSISOLATINCYRILLIC',
467
      'ARABIC',
468
      'ASMO-708',
469
      'ECMA-114',
470
      'ISO-8859-6',
471
      'ISO-IR-127',
472
      'ISO8859-6',
473
      'ISO_8859-6',
474
      'ISO_8859-6:1987',
475
      'CSISOLATINARABIC',
476
      'ECMA-118',
477
      'ELOT_928',
478
      'GREEK',
479
      'GREEK8',
480
      'ISO-8859-7',
481
      'ISO-IR-126',
482
      'ISO8859-7',
483
      'ISO_8859-7',
484
      'ISO_8859-7:1987',
485
      'ISO_8859-7:2003',
486
      'CSISOLATINGREEK',
487
      'HEBREW',
488
      'ISO-8859-8',
489
      'ISO-IR-138',
490
      'ISO8859-8',
491
      'ISO_8859-8',
492
      'ISO_8859-8:1988',
493
      'CSISOLATINHEBREW',
494
      'ISO-8859-9',
495
      'ISO-IR-148',
496
      'ISO8859-9',
497
      'ISO_8859-9',
498
      'ISO_8859-9:1989',
499
      'L5',
500
      'LATIN5',
501
      'CSISOLATIN5',
502
      'ISO-8859-10',
503
      'ISO-IR-157',
504
      'ISO8859-10',
505
      'ISO_8859-10',
506
      'ISO_8859-10:1992',
507
      'L6',
508
      'LATIN6',
509
      'CSISOLATIN6',
510
      'ISO-8859-11',
511
      'ISO8859-11',
512
      'ISO_8859-11',
513
      'ISO-8859-13',
514
      'ISO-IR-179',
515
      'ISO8859-13',
516
      'ISO_8859-13',
517
      'L7',
518
      'LATIN7',
519
      'ISO-8859-14',
520
      'ISO-CELTIC',
521
      'ISO-IR-199',
522
      'ISO8859-14',
523
      'ISO_8859-14',
524
      'ISO_8859-14:1998',
525
      'L8',
526
      'LATIN8',
527
      'ISO-8859-15',
528
      'ISO-IR-203',
529
      'ISO8859-15',
530
      'ISO_8859-15',
531
      'ISO_8859-15:1998',
532
      'LATIN-9',
533
      'ISO-8859-16',
534
      'ISO-IR-226',
535
      'ISO8859-16',
536
      'ISO_8859-16',
537
      'ISO_8859-16:2001',
538
      'L10',
539
      'LATIN10',
540
      'KOI8-R',
541
      'CSKOI8R',
542
      'KOI8-U',
543
      'KOI8-RU',
544
      'CP1250',
545
      'MS-EE',
546
      'WINDOWS-1250',
547
      'CP1251',
548
      'MS-CYRL',
549
      'WINDOWS-1251',
550
      'CP1252',
551
      'MS-ANSI',
552
      'WINDOWS-1252',
553
      'CP1253',
554
      'MS-GREEK',
555
      'WINDOWS-1253',
556
      'CP1254',
557
      'MS-TURK',
558
      'WINDOWS-1254',
559
      'CP1255',
560
      'MS-HEBR',
561
      'WINDOWS-1255',
562
      'CP1256',
563
      'MS-ARAB',
564
      'WINDOWS-1256',
565
      'CP1257',
566
      'WINBALTRIM',
567
      'WINDOWS-1257',
568
      'CP1258',
569
      'WINDOWS-1258',
570
      '850',
571
      'CP850',
572
      'IBM850',
573
      'CSPC850MULTILINGUAL',
574
      '862',
575
      'CP862',
576
      'IBM862',
577
      'CSPC862LATINHEBREW',
578
      '866',
579
      'CP866',
580
      'IBM866',
581
      'CSIBM866',
582
      'MAC',
583
      'MACINTOSH',
584
      'MACROMAN',
585
      'CSMACINTOSH',
586
      'MACCENTRALEUROPE',
587
      'MACICELAND',
588
      'MACCROATIAN',
589
      'MACROMANIA',
590
      'MACCYRILLIC',
591
      'MACUKRAINE',
592
      'MACGREEK',
593
      'MACTURKISH',
594
      'MACHEBREW',
595
      'MACARABIC',
596
      'MACTHAI',
597
      'HP-ROMAN8',
598
      'R8',
599
      'ROMAN8',
600
      'CSHPROMAN8',
601
      'NEXTSTEP',
602
      'ARMSCII-8',
603
      'GEORGIAN-ACADEMY',
604
      'GEORGIAN-PS',
605
      'KOI8-T',
606
      'CP154',
607
      'CYRILLIC-ASIAN',
608
      'PT154',
609
      'PTCP154',
610
      'CSPTCP154',
611
      'KZ-1048',
612
      'RK1048',
613
      'STRK1048-2002',
614
      'CSKZ1048',
615
      'MULELAO-1',
616
      'CP1133',
617
      'IBM-CP1133',
618
      'ISO-IR-166',
619
      'TIS-620',
620
      'TIS620',
621
      'TIS620-0',
622
      'TIS620.2529-1',
623
      'TIS620.2533-0',
624
      'TIS620.2533-1',
625
      'CP874',
626
      'WINDOWS-874',
627
      'VISCII',
628
      'VISCII1.1-1',
629
      'CSVISCII',
630
      'TCVN',
631
      'TCVN-5712',
632
      'TCVN5712-1',
633
      'TCVN5712-1:1993',
634
      'ISO-IR-14',
635
      'ISO646-JP',
636
      'JIS_C6220-1969-RO',
637
      'JP',
638
      'CSISO14JISC6220RO',
639
      'JISX0201-1976',
640
      'JIS_X0201',
641
      'X0201',
642
      'CSHALFWIDTHKATAKANA',
643
      'ISO-IR-87',
644
      'JIS0208',
645
      'JIS_C6226-1983',
646
      'JIS_X0208',
647
      'JIS_X0208-1983',
648
      'JIS_X0208-1990',
649
      'X0208',
650
      'CSISO87JISX0208',
651
      'ISO-IR-159',
652
      'JIS_X0212',
653
      'JIS_X0212-1990',
654
      'JIS_X0212.1990-0',
655
      'X0212',
656
      'CSISO159JISX02121990',
657
      'CN',
658
      'GB_1988-80',
659
      'ISO-IR-57',
660
      'ISO646-CN',
661
      'CSISO57GB1988',
662
      'CHINESE',
663
      'GB_2312-80',
664
      'ISO-IR-58',
665
      'CSISO58GB231280',
666
      'CN-GB-ISOIR165',
667
      'ISO-IR-165',
668
      'ISO-IR-149',
669
      'KOREAN',
670
      'KSC_5601',
671
      'KS_C_5601-1987',
672
      'KS_C_5601-1989',
673
      'CSKSC56011987',
674
      'EUC-JP',
675
      'EUCJP',
676
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
677
      'CSEUCPKDFMTJAPANESE',
678
      'MS_KANJI',
679
      'SHIFT-JIS',
680
      'SHIFT_JIS',
681
      'SJIS',
682
      'CSSHIFTJIS',
683
      'CP932',
684
      'ISO-2022-JP',
685
      'CSISO2022JP',
686
      'ISO-2022-JP-1',
687
      'ISO-2022-JP-2',
688
      'CSISO2022JP2',
689
      'CN-GB',
690
      'EUC-CN',
691
      'EUCCN',
692
      'GB2312',
693
      'CSGB2312',
694
      'GBK',
695
      'CP936',
696
      'MS936',
697
      'WINDOWS-936',
698
      'GB18030',
699
      'ISO-2022-CN',
700
      'CSISO2022CN',
701
      'ISO-2022-CN-EXT',
702
      'HZ',
703
      'HZ-GB-2312',
704
      'EUC-TW',
705
      'EUCTW',
706
      'CSEUCTW',
707
      'BIG-5',
708
      'BIG-FIVE',
709
      'BIG5',
710
      'BIGFIVE',
711
      'CN-BIG5',
712
      'CSBIG5',
713
      'CP950',
714
      'BIG5-HKSCS:1999',
715
      'BIG5-HKSCS:2001',
716
      'BIG5-HKSCS',
717
      'BIG5-HKSCS:2004',
718
      'BIG5HKSCS',
719
      'EUC-KR',
720
      'EUCKR',
721
      'CSEUCKR',
722
      'CP949',
723
      'UHC',
724
      'CP1361',
725
      'JOHAB',
726
      'ISO-2022-KR',
727
      'CSISO2022KR',
728
      'CP856',
729
      'CP922',
730
      'CP943',
731
      'CP1046',
732
      'CP1124',
733
      'CP1129',
734
      'CP1161',
735
      'IBM-1161',
736
      'IBM1161',
737
      'CSIBM1161',
738
      'CP1162',
739
      'IBM-1162',
740
      'IBM1162',
741
      'CSIBM1162',
742
      'CP1163',
743
      'IBM-1163',
744
      'IBM1163',
745
      'CSIBM1163',
746
      'DEC-KANJI',
747
      'DEC-HANYU',
748
      '437',
749
      'CP437',
750
      'IBM437',
751
      'CSPC8CODEPAGE437',
752
      'CP737',
753
      'CP775',
754
      'IBM775',
755
      'CSPC775BALTIC',
756
      '852',
757
      'CP852',
758
      'IBM852',
759
      'CSPCP852',
760
      'CP853',
761
      '855',
762
      'CP855',
763
      'IBM855',
764
      'CSIBM855',
765
      '857',
766
      'CP857',
767
      'IBM857',
768
      'CSIBM857',
769
      'CP858',
770
      '860',
771
      'CP860',
772
      'IBM860',
773
      'CSIBM860',
774
      '861',
775
      'CP-IS',
776
      'CP861',
777
      'IBM861',
778
      'CSIBM861',
779
      '863',
780
      'CP863',
781
      'IBM863',
782
      'CSIBM863',
783
      'CP864',
784
      'IBM864',
785
      'CSIBM864',
786
      '865',
787
      'CP865',
788
      'IBM865',
789
      'CSIBM865',
790
      '869',
791
      'CP-GR',
792
      'CP869',
793
      'IBM869',
794
      'CSIBM869',
795
      'CP1125',
796
      'EUC-JISX0213',
797
      'SHIFT_JISX0213',
798
      'ISO-2022-JP-3',
799
      'BIG5-2003',
800
      'ISO-IR-230',
801
      'TDS565',
802
      'ATARI',
803
      'ATARIST',
804
      'RISCOS-LATIN1',
805
  );
806
807 1
  /**
808
   * @var array
809 1
   */
810 1
  private static $SUPPORT = array();
811
812
  /**
813
   * __construct()
814
   */
815
  public function __construct()
816
  {
817
    self::checkForSupport();
818
  }
819
820 2
  /**
821
   * Return the character at the specified position: $str[1] like functionality.
822 2
   *
823
   * @param string $str <p>A UTF-8 string.</p>
824
   * @param int    $pos <p>The position of character to return.</p>
825
   *
826
   * @return string <p>Single Multi-Byte character.</p>
827
   */
828
  public static function access($str, $pos)
829
  {
830
    $str = (string)$str;
831
    $pos = (int)$pos;
832
833
    if (!isset($str[0])) {
834 1
      return '';
835
    }
836 1
837 1
    if ($pos < 0) {
838 1
      return '';
839
    }
840 1
841
    return self::substr($str, $pos, 1);
842
  }
843
844
  /**
845
   * Prepends UTF-8 BOM character to the string and returns the whole string.
846
   *
847
   * INFO: If BOM already existed there, the Input string is returned.
848
   *
849
   * @param string $str <p>The input string.</p>
850 1
   *
851
   * @return string <p>The output string that contains BOM.</p>
852 1
   */
853
  public static function add_bom_to_string($str)
854
  {
855
    if (self::string_has_bom($str) === false) {
856
      $str = self::bom() . $str;
857
    }
858
859
    return $str;
860 2
  }
861
862 2
  /**
863
   * Convert binary into an string.
864
   *
865
   * @param mixed $bin 1|0
866
   *
867
   * @return string
868
   */
869
  public static function binary_to_str($bin)
870
  {
871
    if (!isset($bin[0])) {
872
      return '';
873
    }
874 1
875
    return pack('H*', base_convert($bin, 2, 16));
876 1
  }
877
878
  /**
879
   * Returns the UTF-8 Byte Order Mark Character.
880
   *
881
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
882
   *
883
   * @return string UTF-8 Byte Order Mark
884 2
   */
885
  public static function bom()
886 2
  {
887
    return "\xef\xbb\xbf";
888 1
  }
889
890 1
  /**
891 1
   * @alias of UTF8::chr_map()
892 1
   *
893 1
   * @see   UTF8::chr_map()
894 1
   *
895 1
   * @param string|array $callback
896 2
   * @param string       $str
897
   *
898
   * @return array
899
   */
900
  public static function callback($callback, $str)
901
  {
902
    return self::chr_map($callback, $str);
903
  }
904
905
  /**
906
   * This method will auto-detect your server environment for UTF-8 support.
907 9
   *
908
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
909 9
   */
910 9
  public static function checkForSupport()
911 1
  {
912
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
913
914 9
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
915
916
      // http://php.net/manual/en/book.mbstring.php
917
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
918 9
919
      // http://php.net/manual/en/book.iconv.php
920
      self::$SUPPORT['iconv'] = self::iconv_loaded();
921
922
      // http://php.net/manual/en/book.intl.php
923 9
      self::$SUPPORT['intl'] = self::intl_loaded();
924 9
925 8
      // http://php.net/manual/en/class.intlchar.php
926
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
927
928
      // http://php.net/manual/en/book.pcre.php
929 8
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
930 6
    }
931
  }
932
933 7
  /**
934 6
   * Generates a UTF-8 encoded character from the given code point.
935 6
   *
936
   * INFO: opposite to UTF8::ord()
937
   *
938 7
   * @param int    $code_point <p>The code point for which to generate a character.</p>
939 7
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
940 7
   *
941 7
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
942
   */
943
  public static function chr($code_point, $encoding = 'UTF-8')
944 1
  {
945 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
946 1
      self::checkForSupport();
947 1
    }
948 1
949
    if ($encoding !== 'UTF-8') {
950
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
951
    } elseif (self::$SUPPORT['intlChar'] === true) {
952
      return \IntlChar::chr($code_point);
953
    }
954
955
    // use static cache, only if there is no support for "\IntlChar"
956
    static $CHAR_CACHE = array();
957
    $cacheKey = $code_point . $encoding;
958
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
959
      return $CHAR_CACHE[$cacheKey];
960
    }
961
962
    if (0x80 > $code_point %= 0x200000) {
963 1
      $str = UTF8NonStrict::chr($code_point);
964
    } elseif (0x800 > $code_point) {
965 1
      $str = UTF8NonStrict::chr(0xC0 | $code_point >> 6) .
966
             UTF8NonStrict::chr(0x80 | $code_point & 0x3F);
967 1
    } elseif (0x10000 > $code_point) {
968
      $str = UTF8NonStrict::chr(0xE0 | $code_point >> 12) .
969
             UTF8NonStrict::chr(0x80 | $code_point >> 6 & 0x3F) .
970
             UTF8NonStrict::chr(0x80 | $code_point & 0x3F);
971
    } else {
972
      $str = UTF8NonStrict::chr(0xF0 | $code_point >> 18) .
973
             UTF8NonStrict::chr(0x80 | $code_point >> 12 & 0x3F) .
974
             UTF8NonStrict::chr(0x80 | $code_point >> 6 & 0x3F) .
975
             UTF8NonStrict::chr(0x80 | $code_point & 0x3F);
976
    }
977
978
    if ($encoding !== 'UTF-8') {
979
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
980
    }
981
982 4
    // add into static cache
983
    $CHAR_CACHE[$cacheKey] = $str;
984 4
985 3
    return $str;
986
  }
987
988 4
  /**
989
   * Applies callback to all characters of a string.
990
   *
991
   * @param string|array $callback <p>The callback function.</p>
992
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
993
   *
994
   * @return array <p>The outcome of callback.</p>
995
   */
996
  public static function chr_map($callback, $str)
997
  {
998 2
    $chars = self::split($str);
999
1000 2
    return array_map($callback, $chars);
1001 2
  }
1002 2
1003
  /**
1004 2
   * Generates an array of byte length of each character of a Unicode string.
1005
   *
1006 2
   * 1 byte => U+0000  - U+007F
1007
   * 2 byte => U+0080  - U+07FF
1008
   * 3 byte => U+0800  - U+FFFF
1009 2
   * 4 byte => U+10000 - U+10FFFF
1010
   *
1011 2
   * @param string $str <p>The original Unicode string.</p>
1012 2
   *
1013 2
   * @return array <p>An array of byte lengths of each character.</p>
1014
   */
1015 1
  public static function chr_size_list($str)
1016 1
  {
1017 1
    $str = (string)$str;
1018
1019
    if (!isset($str[0])) {
1020
      return array();
1021
    }
1022
1023 2
    return array_map('strlen', self::split($str));
1024
  }
1025 2
1026 2
  /**
1027
   * Get a decimal code representation of a specific character.
1028 2
   *
1029
   * @param string $char <p>The input character.</p>
1030
   *
1031
   * @return int
1032
   */
1033
  public static function chr_to_decimal($char)
1034
  {
1035
    $char = (string)$char;
1036
    $code = self::ord($char[0]);
1037
    $bytes = 1;
1038
1039 1
    if (!($code & 0x80)) {
1040
      // 0xxxxxxx
1041 1
      return $code;
1042
    }
1043
1044
    if (($code & 0xe0) === 0xc0) {
1045
      // 110xxxxx
1046
      $bytes = 2;
1047
      $code &= ~0xc0;
1048
    } elseif (($code & 0xf0) === 0xe0) {
1049
      // 1110xxxx
1050
      $bytes = 3;
1051
      $code &= ~0xe0;
1052
    } elseif (($code & 0xf8) === 0xf0) {
1053 1
      // 11110xxx
1054
      $bytes = 4;
1055 1
      $code &= ~0xf0;
1056
    }
1057
1058
    for ($i = 2; $i <= $bytes; $i++) {
1059
      // 10xxxxxx
1060
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1061
    }
1062
1063
    return $code;
1064
  }
1065
1066
  /**
1067
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1068
   *
1069
   * @param string $char <p>The input character</p>
1070
   * @param string $pfix [optional]
1071 44
   *
1072
   * @return string <p>The code point encoded as U+xxxx<p>
1073
   */
1074
  public static function chr_to_hex($char, $pfix = 'U+')
1075
  {
1076
    $char = (string)$char;
1077
1078
    if (!isset($char[0])) {
1079
      return '';
1080
    }
1081
1082
    if ($char === '&#0;') {
1083
      $char = '';
1084
    }
1085
1086 44
    return self::int_to_hex(self::ord($char), $pfix);
1087 44
  }
1088
1089 44
  /**
1090 44
   * alias for "UTF8::chr_to_decimal()"
1091
   *
1092 44
   * @see UTF8::chr_to_decimal()
1093 17
   *
1094 17
   * @param string $chr
1095
   *
1096 44
   * @return int
1097 12
   */
1098 12
  public static function chr_to_int($chr)
1099
  {
1100 44
    return self::chr_to_decimal($chr);
1101 5
  }
1102 5
1103
  /**
1104 44
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1105
   *
1106
   * @param string $body     <p>The original string to be split.</p>
1107
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1108
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1109
   *
1110
   * @return string <p>The chunked string</p>
1111
   */
1112
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1113
  {
1114 4
    return implode($end, self::split($body, $chunklen));
1115
  }
1116 4
1117
  /**
1118 4
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1119 1
   *
1120
   * @param string $str                     <p>The string to be sanitized.</p>
1121
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1122
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1123 4
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1124
   *                                        => "..."</p>
1125
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1126
   *                                        $normalize_whitespace</p>
1127
   *
1128
   * @return string <p>Clean UTF-8 encoded string.</p>
1129
   */
1130 4
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1131
  {
1132 4
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1133
    // caused connection reset problem on larger strings
1134
1135
    $regx = '/
1136
      (
1137
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1138
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1139
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1140
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1141
        ){1,100}                      # ...one or more times
1142
      )
1143
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1144
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1145
    /x';
1146 5
    $str = preg_replace($regx, '$1', $str);
1147
1148 5
    $str = self::replace_diamond_question_mark($str, '');
1149 5
    $str = self::remove_invisible_characters($str);
1150 5
1151
    if ($normalize_whitespace === true) {
1152 5
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1153
    }
1154 5
1155 5
    if ($normalize_msword === true) {
1156 5
      $str = self::normalize_msword($str);
1157
    }
1158 5
1159
    if ($remove_bom === true) {
1160 5
      $str = self::remove_bom($str);
1161 1
    }
1162
1163 1
    return $str;
1164 1
  }
1165 1
1166
  /**
1167 1
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1168 1
   *
1169
   * @param string $str <p>The input string.</p>
1170 5
   *
1171
   * @return string
1172
   */
1173
  public static function cleanup($str)
1174
  {
1175
    $str = (string)$str;
1176
1177
    if (!isset($str[0])) {
1178
      return '';
1179
    }
1180
1181
    // fixed ISO <-> UTF-8 Errors
1182 6
    $str = self::fix_simple_utf8($str);
1183
1184 6
    // remove all none UTF-8 symbols
1185
    // && remove diamond question mark (�)
1186
    // && remove remove invisible characters (e.g. "\0")
1187
    // && remove BOM
1188
    // && normalize whitespace chars (but keep non-breaking-spaces)
1189
    $str = self::clean($str, true, true, false, true);
1190
1191
    return (string)$str;
1192
  }
1193
1194 1
  /**
1195
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1196 1
   *
1197 1
   * INFO: opposite to UTF8::string()
1198 1
   *
1199
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1200 1
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1201
   *                                    default, code points will be returned as integers.</p>
1202
   *
1203
   * @return array <p>The array of code points.</p>
1204
   */
1205
  public static function codepoints($arg, $u_style = false)
1206
  {
1207
    if (is_string($arg) === true) {
1208
      $arg = self::split($arg);
1209
    }
1210
1211
    $arg = array_map(
1212
        array(
1213
            '\\voku\\helper\\UTF8',
1214
            'ord',
1215
        ),
1216 11
        $arg
1217
    );
1218 11
1219 11
    if ($u_style) {
1220
      $arg = array_map(
1221 11
          array(
1222 5
              '\\voku\\helper\\UTF8',
1223
              'int_to_hex',
1224
          ),
1225 11
          $arg
1226 1
      );
1227 1
    }
1228
1229 11
    return $arg;
1230
  }
1231
1232
  /**
1233 11
   * Returns count of characters used in a string.
1234
   *
1235
   * @param string $str       <p>The input string.</p>
1236 11
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1237
   *
1238 1
   * @return array <p>An associative array of Character as keys and
1239 11
   *               their count as values.</p>
1240
   */
1241
  public static function count_chars($str, $cleanUtf8 = false)
1242
  {
1243 11
    return array_count_values(self::split($str, 1, $cleanUtf8));
1244
  }
1245
1246 11
  /**
1247 1
   * Converts a int-value into an UTF-8 character.
1248 1
   *
1249 1
   * @param int $int
1250 11
   *
1251 11
   * @return string
1252
   */
1253
  public static function decimal_to_chr($int)
1254
  {
1255
    if (Bootup::is_php('5.4') === true) {
1256 2
      $flags = ENT_QUOTES | ENT_HTML5;
1257
    } else {
1258
      $flags = ENT_QUOTES;
1259 1
    }
1260
1261
    return self::html_entity_decode('&#' . $int . ';', $flags);
1262 2
  }
1263 1
1264
  /**
1265
   * Encode a string with a new charset-encoding.
1266 2
   *
1267 2
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1268 2
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1269
   *
1270 2
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1271
   * @param string $str      <p>The input string</p>
1272 2
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1273 2
   *                         /> otherwise we auto-detect the current string-encoding</p>
1274
   *
1275
   * @return string
1276
   */
1277 1
  public static function encode($encoding, $str, $force = true)
1278
  {
1279
    $str = (string)$str;
1280
    $encoding = (string)$encoding;
1281
1282
    if (!isset($str[0], $encoding[0])) {
1283
      return $str;
1284
    }
1285
1286
    if ($encoding !== 'UTF-8') {
1287
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1288
    }
1289
1290
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1291
      self::checkForSupport();
1292
    }
1293
1294
    $encodingDetected = self::str_detect_encoding($str);
1295
1296
    if (
1297
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1298
        &&
1299
        (
1300
            $force === true
1301
            ||
1302
            $encodingDetected !== $encoding
1303
        )
1304
    ) {
1305
1306
      if (
1307
          $encoding === 'UTF-8'
1308
          &&
1309
          (
1310
              $force === true
1311
              || $encodingDetected === 'UTF-8'
1312
              || $encodingDetected === 'WINDOWS-1252'
1313
              || $encodingDetected === 'ISO-8859-1'
1314
          )
1315
      ) {
1316
        return self::to_utf8($str);
1317
      }
1318
1319
      if (
1320
          $encoding === 'ISO-8859-1'
1321
          &&
1322
          (
1323
              $force === true
1324
              || $encodingDetected === 'ISO-8859-1'
1325
              || $encodingDetected === 'UTF-8'
1326
          )
1327
      ) {
1328
        return self::to_iso8859($str);
1329
      }
1330
1331 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1332
          $encoding !== 'UTF-8'
1333
          &&
1334
          $encoding !== 'WINDOWS-1252'
1335
          &&
1336
          self::$SUPPORT['mbstring'] === false
1337
      ) {
1338
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1339
      }
1340
1341
      $strEncoded = \mb_convert_encoding(
1342
          $str,
1343
          $encoding,
1344
          $encodingDetected
1345
      );
1346
1347
      if ($strEncoded) {
1348
        return $strEncoded;
1349
      }
1350
    }
1351
1352
    return $str;
1353
  }
1354
1355
  /**
1356
   * Reads entire file into a string.
1357
   *
1358
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1359
   *
1360
   * @link http://php.net/manual/en/function.file-get-contents.php
1361
   *
1362 2
   * @param string        $filename      <p>
1363
   *                                     Name of the file to read.
1364
   *                                     </p>
1365 2
   * @param int|false     $flags         [optional] <p>
1366 2
   *                                     Prior to PHP 6, this parameter is called
1367
   *                                     use_include_path and is a bool.
1368 2
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1369 2
   *                                     to trigger include path
1370
   *                                     search.
1371
   *                                     </p>
1372
   *                                     <p>
1373 2
   *                                     The value of flags can be any combination of
1374 2
   *                                     the following flags (with some restrictions), joined with the
1375
   *                                     binary OR (|)
1376 2
   *                                     operator.
1377 2
   *                                     </p>
1378
   *                                     <p>
1379 2
   *                                     <table>
1380 1
   *                                     Available flags
1381 1
   *                                     <tr valign="top">
1382 2
   *                                     <td>Flag</td>
1383
   *                                     <td>Description</td>
1384
   *                                     </tr>
1385
   *                                     <tr valign="top">
1386 2
   *                                     <td>
1387 1
   *                                     FILE_USE_INCLUDE_PATH
1388
   *                                     </td>
1389
   *                                     <td>
1390 1
   *                                     Search for filename in the include directory.
1391 1
   *                                     See include_path for more
1392 1
   *                                     information.
1393 1
   *                                     </td>
1394
   *                                     </tr>
1395 1
   *                                     <tr valign="top">
1396
   *                                     <td>
1397
   *                                     FILE_TEXT
1398
   *                                     </td>
1399
   *                                     <td>
1400
   *                                     As of PHP 6, the default encoding of the read
1401
   *                                     data is UTF-8. You can specify a different encoding by creating a
1402
   *                                     custom context or by changing the default using
1403
   *                                     stream_default_encoding. This flag cannot be
1404
   *                                     used with FILE_BINARY.
1405 1
   *                                     </td>
1406
   *                                     </tr>
1407 1
   *                                     <tr valign="top">
1408
   *                                     <td>
1409
   *                                     FILE_BINARY
1410
   *                                     </td>
1411
   *                                     <td>
1412
   *                                     With this flag, the file is read in binary mode. This is the default
1413
   *                                     setting and cannot be used with FILE_TEXT.
1414
   *                                     </td>
1415
   *                                     </tr>
1416
   *                                     </table>
1417
   *                                     </p>
1418
   * @param resource|null $context       [optional] <p>
1419 9
   *                                     A valid context resource created with
1420
   *                                     stream_context_create. If you don't need to use a
1421 9
   *                                     custom context, you can skip this parameter by &null;.
1422 9
   *                                     </p>
1423 3
   * @param int|null      $offset        [optional] <p>
1424
   *                                     The offset where the reading starts.
1425 3
   *                                     </p>
1426 3
   * @param int|null      $maxlen        [optional] <p>
1427 3
   *                                     Maximum length of data read. The default is to read until end
1428 9
   *                                     of file is reached.
1429 2
   *                                     </p>
1430 2
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1431 2
   *
1432 2
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1433 9
   *                                     or pdf, because they used non default utf-8 chars</p>
1434
   *
1435 8
   * @return string <p>The function returns the read data or false on failure.</p>
1436
   */
1437 2
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1438 2
  {
1439
    // init
1440 8
    $timeout = (int)$timeout;
1441
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1442 8
1443 6
    if ($timeout && $context === null) {
1444 6
      $context = stream_context_create(
1445 6
          array(
1446
              'http' =>
1447 6
                  array(
1448 3
                      'timeout' => $timeout,
1449 3
                  ),
1450 5
          )
1451
      );
1452
    }
1453
1454
    if (!$flags) {
1455 8
      $flags = false;
1456 8
    }
1457 5
1458 8
    if ($offset === null) {
1459
      $offset = 0;
1460
    }
1461 2
1462 2
    if (is_int($maxlen) === true) {
1463 8
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1464 8
    } else {
1465 9
      $data = file_get_contents($filename, $flags, $context, $offset);
1466
    }
1467 9
1468
    // return false on error
1469
    if ($data === false) {
1470
      return false;
1471
    }
1472
1473
    if ($convertToUtf8 === true) {
1474
      $data = self::encode('UTF-8', $data, false);
1475
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1476
    }
1477
1478
    return $data;
1479
  }
1480
1481
  /**
1482
   * Checks if a file starts with BOM (Byte Order Mark) character.
1483
   *
1484
   * @param string $file_path <p>Path to a valid file.</p>
1485
   *
1486
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1487
   */
1488
  public static function file_has_bom($file_path)
1489
  {
1490
    return self::string_has_bom(file_get_contents($file_path));
1491
  }
1492
1493
  /**
1494
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1495
   *
1496
   * @param mixed  $var
1497
   * @param int    $normalization_form
1498
   * @param string $leading_combining
1499
   *
1500
   * @return mixed
1501
   */
1502
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1503
  {
1504
    switch (gettype($var)) {
1505 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1506
        foreach ($var as $k => $v) {
1507
          /** @noinspection AlterInForeachInspection */
1508
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1509
        }
1510
        break;
1511 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1512
        foreach ($var as $k => $v) {
1513
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1514
        }
1515
        break;
1516
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1517
1518
        if (false !== strpos($var, "\r")) {
1519
          // Workaround https://bugs.php.net/65732
1520 1
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1521
        }
1522 1
1523 1
        if (self::is_ascii($var) === false) {
1524 1
          /** @noinspection PhpUndefinedClassInspection */
1525 1
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1526
            $n = '-';
1527
          } else {
1528 1
            /** @noinspection PhpUndefinedClassInspection */
1529
            $n = \Normalizer::normalize($var, $normalization_form);
1530
1531
            if (isset($n[0])) {
1532
              $var = $n;
1533
            } else {
1534
              $var = self::encode('UTF-8', $var);
1535
            }
1536
          }
1537
1538
          if (
1539
              $var[0] >= "\x80"
1540 1
              &&
1541
              isset($n[0], $leading_combining[0])
1542 1
              &&
1543 1
              preg_match('/^\p{Mn}/u', $var)
1544 1
          ) {
1545 1
            // Prevent leading combining chars
1546
            // for NFC-safe concatenations.
1547
            $var = $leading_combining . $var;
1548 1
          }
1549
        }
1550
1551
        break;
1552
    }
1553
1554
    return $var;
1555
  }
1556
1557
  /**
1558
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1559 1
   *
1560
   * Gets a specific external variable by name and optionally filters it
1561 1
   *
1562
   * @link  http://php.net/manual/en/function.filter-input.php
1563
   *
1564
   * @param int    $type          <p>
1565
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1566
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1567
   *                              <b>INPUT_ENV</b>.
1568
   *                              </p>
1569
   * @param string $variable_name <p>
1570
   *                              Name of a variable to get.
1571
   *                              </p>
1572
   * @param int    $filter        [optional] <p>
1573
   *                              The ID of the filter to apply. The
1574
   *                              manual page lists the available filters.
1575
   *                              </p>
1576
   * @param mixed  $options       [optional] <p>
1577 7
   *                              Associative array of options or bitwise disjunction of flags. If filter
1578
   *                              accepts options, flags can be provided in "flags" field of array.
1579 7
   *                              </p>
1580 7
   *
1581
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1582 7
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1583
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1584 7
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1585 2
   * @since 5.2.0
1586
   */
1587 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1588 7
  {
1589 1
    if (4 > func_num_args()) {
1590 1
      $var = filter_input($type, $variable_name, $filter);
1591 1
    } else {
1592
      $var = filter_input($type, $variable_name, $filter, $options);
1593 7
    }
1594
1595
    return self::filter($var);
1596
  }
1597
1598
  /**
1599
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1600
   *
1601
   * Gets external variables and optionally filters them
1602
   *
1603 1
   * @link  http://php.net/manual/en/function.filter-input-array.php
1604
   *
1605 1
   * @param int   $type       <p>
1606
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1607 1
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1608
   *                          <b>INPUT_ENV</b>.
1609
   *                          </p>
1610 1
   * @param mixed $definition [optional] <p>
1611 1
   *                          An array defining the arguments. A valid key is a string
1612
   *                          containing a variable name and a valid value is either a filter type, or an array
1613 1
   *                          optionally specifying the filter, flags and options. If the value is an
1614
   *                          array, valid keys are filter which specifies the
1615
   *                          filter type,
1616 1
   *                          flags which specifies any flags that apply to the
1617 1
   *                          filter, and options which specifies any options that
1618 1
   *                          apply to the filter. See the example below for a better understanding.
1619 1
   *                          </p>
1620 1
   *                          <p>
1621
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1622 1
   *                          input array are filtered by this filter.
1623
   *                          </p>
1624
   * @param bool  $add_empty  [optional] <p>
1625
   *                          Add missing keys as <b>NULL</b> to the return value.
1626
   *                          </p>
1627
   *
1628
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1629
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1630
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1631
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1632 1
   * fails.
1633
   * @since 5.2.0
1634 1
   */
1635 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1636
  {
1637
    if (2 > func_num_args()) {
1638 1
      $a = filter_input_array($type);
1639
    } else {
1640
      $a = filter_input_array($type, $definition, $add_empty);
1641
    }
1642
1643
    return self::filter($a);
1644
  }
1645
1646
  /**
1647
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1648
   *
1649
   * Filters a variable with a specified filter
1650
   *
1651
   * @link  http://php.net/manual/en/function.filter-var.php
1652
   *
1653
   * @param mixed $variable <p>
1654 1
   *                        Value to filter.
1655
   *                        </p>
1656 1
   * @param int   $filter   [optional] <p>
1657 1
   *                        The ID of the filter to apply. The
1658
   *                        manual page lists the available filters.
1659
   *                        </p>
1660 1
   * @param mixed $options  [optional] <p>
1661
   *                        Associative array of options or bitwise disjunction of flags. If filter
1662 1
   *                        accepts options, flags can be provided in "flags" field of array. For
1663 1
   *                        the "callback" filter, callable type should be passed. The
1664 1
   *                        callback must accept one argument, the value to be filtered, and return
1665 1
   *                        the value after filtering/sanitizing it.
1666 1
   *                        </p>
1667 1
   *                        <p>
1668 1
   *                        <code>
1669 1
   *                        // for filters that accept options, use this format
1670 1
   *                        $options = array(
1671 1
   *                        'options' => array(
1672 1
   *                        'default' => 3, // value to return if the filter fails
1673
   *                        // other options here
1674
   *                        'min_range' => 0
1675
   *                        ),
1676
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1677
   *                        );
1678
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1679
   *                        // for filter that only accept flags, you can pass them directly
1680
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1681
   *                        // for filter that only accept flags, you can also pass as an array
1682
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1683
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1684
   *                        // callback validate filter
1685
   *                        function foo($value)
1686
   *                        {
1687
   *                        // Expected format: Surname, GivenNames
1688
   *                        if (strpos($value, ", ") === false) return false;
1689
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1690
   *                        $empty = (empty($surname) || empty($givennames));
1691
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1692 1
   *                        if ($empty || $notstrings) {
1693 1
   *                        return false;
1694
   *                        } else {
1695
   *                        return $value;
1696
   *                        }
1697
   *                        }
1698
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1699
   *                        </code>
1700
   *                        </p>
1701
   *
1702
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1703
   * @since 5.2.0
1704
   */
1705 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1706
  {
1707
    if (3 > func_num_args()) {
1708
      $variable = filter_var($variable, $filter);
1709
    } else {
1710
      $variable = filter_var($variable, $filter, $options);
1711
    }
1712
1713
    return self::filter($variable);
1714
  }
1715
1716
  /**
1717
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1718
   *
1719
   * Gets multiple variables and optionally filters them
1720
   *
1721
   * @link  http://php.net/manual/en/function.filter-var-array.php
1722
   *
1723
   * @param array $data       <p>
1724
   *                          An array with string keys containing the data to filter.
1725
   *                          </p>
1726
   * @param mixed $definition [optional] <p>
1727
   *                          An array defining the arguments. A valid key is a string
1728
   *                          containing a variable name and a valid value is either a
1729
   *                          filter type, or an
1730
   *                          array optionally specifying the filter, flags and options.
1731
   *                          If the value is an array, valid keys are filter
1732
   *                          which specifies the filter type,
1733
   *                          flags which specifies any flags that apply to the
1734
   *                          filter, and options which specifies any options that
1735
   *                          apply to the filter. See the example below for a better understanding.
1736
   *                          </p>
1737
   *                          <p>
1738
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1739
   *                          input array are filtered by this filter.
1740
   *                          </p>
1741
   * @param bool  $add_empty  [optional] <p>
1742
   *                          Add missing keys as <b>NULL</b> to the return value.
1743
   *                          </p>
1744
   *
1745
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1746
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1747
   * the variable is not set.
1748
   * @since 5.2.0
1749
   */
1750 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1751
  {
1752 1
    if (2 > func_num_args()) {
1753
      $a = filter_var_array($data);
1754 1
    } else {
1755 1
      $a = filter_var_array($data, $definition, $add_empty);
1756
    }
1757 1
1758
    return self::filter($a);
1759
  }
1760
1761
  /**
1762
   * Check if the number of unicode characters are not more than the specified integer.
1763
   *
1764
   * @param string $str      The original string to be checked.
1765
   * @param int    $box_size The size in number of chars to be checked against string.
1766
   *
1767
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1768
   */
1769
  public static function fits_inside($str, $box_size)
1770
  {
1771
    return (self::strlen($str) <= $box_size);
1772 1
  }
1773
1774 1
  /**
1775
   * Try to fix simple broken UTF-8 strings.
1776
   *
1777
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1778
   *
1779
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1780
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1781
   * See: http://en.wikipedia.org/wiki/Windows-1252
1782
   *
1783
   * @param string $str <p>The input string</p>
1784
   *
1785
   * @return string
1786 1
   */
1787 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1788 1
  {
1789 1
    // init
1790
    $str = (string)$str;
1791
1792 1
    if (!isset($str[0])) {
1793 1
      return '';
1794
    }
1795
1796 1
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1797
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1798
1799
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1800
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1801
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1802
    }
1803
1804
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1805
  }
1806
1807
  /**
1808
   * Fix a double (or multiple) encoded UTF8 string.
1809
   *
1810 1
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1811
   *
1812 1
   * @return mixed
1813
   */
1814
  public static function fix_utf8($str)
1815
  {
1816
    if (is_array($str) === true) {
1817
1818
      /** @noinspection ForeachSourceInspection */
1819
      foreach ($str as $k => $v) {
1820
        /** @noinspection AlterInForeachInspection */
1821
        /** @noinspection OffsetOperationsInspection */
1822
        $str[$k] = self::fix_utf8($v);
1823
      }
1824
1825
      return $str;
1826 2
    }
1827
1828
    $last = '';
1829 2
    while ($last !== $str) {
1830
      $last = $str;
1831 2
      $str = self::to_utf8(
1832 2
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1831 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1833 1
      );
1834 1
    }
1835
1836 2
    return $str;
1837 1
  }
1838 1
1839
  /**
1840 2
   * Get character of a specific character.
1841 2
   *
1842 2
   * @param string $char
1843
   *
1844 2
   * @return string <p>'RTL' or 'LTR'</p>
1845
   */
1846
  public static function getCharDirection($char)
1847
  {
1848
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1849
      self::checkForSupport();
1850
    }
1851
1852
    if (self::$SUPPORT['intlChar'] === true) {
1853
      $tmpReturn = \IntlChar::charDirection($char);
1854
1855
      // from "IntlChar"-Class
1856
      $charDirection = array(
1857
          'RTL' => array(1, 13, 14, 15, 21),
1858
          'LTR' => array(0, 11, 12, 20),
1859
      );
1860
1861
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1862
        return 'LTR';
1863
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1864
        return 'RTL';
1865
      }
1866
    }
1867
1868
    $c = static::chr_to_decimal($char);
1869
1870
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1871
      return 'LTR';
1872
    }
1873
1874
    if (0x85e >= $c) {
1875
1876
      if (0x5be === $c ||
1877
          0x5c0 === $c ||
1878
          0x5c3 === $c ||
1879
          0x5c6 === $c ||
1880
          (0x5d0 <= $c && 0x5ea >= $c) ||
1881
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1882
          0x608 === $c ||
1883
          0x60b === $c ||
1884
          0x60d === $c ||
1885
          0x61b === $c ||
1886
          (0x61e <= $c && 0x64a >= $c) ||
1887
          (0x66d <= $c && 0x66f >= $c) ||
1888
          (0x671 <= $c && 0x6d5 >= $c) ||
1889
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1890
          (0x6ee <= $c && 0x6ef >= $c) ||
1891
          (0x6fa <= $c && 0x70d >= $c) ||
1892
          0x710 === $c ||
1893
          (0x712 <= $c && 0x72f >= $c) ||
1894
          (0x74d <= $c && 0x7a5 >= $c) ||
1895
          0x7b1 === $c ||
1896
          (0x7c0 <= $c && 0x7ea >= $c) ||
1897
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1898
          0x7fa === $c ||
1899
          (0x800 <= $c && 0x815 >= $c) ||
1900
          0x81a === $c ||
1901
          0x824 === $c ||
1902
          0x828 === $c ||
1903
          (0x830 <= $c && 0x83e >= $c) ||
1904
          (0x840 <= $c && 0x858 >= $c) ||
1905
          0x85e === $c
1906
      ) {
1907
        return 'RTL';
1908
      }
1909
1910
    } elseif (0x200f === $c) {
1911
1912
      return 'RTL';
1913
1914
    } elseif (0xfb1d <= $c) {
1915
1916
      if (0xfb1d === $c ||
1917
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1918
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1919
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1920
          0xfb3e === $c ||
1921
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1922
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1923
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1924
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1925
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1926 9
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1927
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1928 9
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1929
          (0xfe76 <= $c && 0xfefc >= $c) ||
1930 9
          (0x10800 <= $c && 0x10805 >= $c) ||
1931 6
          0x10808 === $c ||
1932
          (0x1080a <= $c && 0x10835 >= $c) ||
1933
          (0x10837 <= $c && 0x10838 >= $c) ||
1934 9
          0x1083c === $c ||
1935 7
          (0x1083f <= $c && 0x10855 >= $c) ||
1936
          (0x10857 <= $c && 0x1085f >= $c) ||
1937
          (0x10900 <= $c && 0x1091b >= $c) ||
1938
          (0x10920 <= $c && 0x10939 >= $c) ||
1939 9
          0x1093f === $c ||
1940 9
          0x10a00 === $c ||
1941
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1942 9
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1943 9
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1944 9
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1945 9
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1946 9
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1947 6
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1948
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1949
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1950 9
          (0x10b78 <= $c && 0x10b7f >= $c)
1951 2
      ) {
1952 2
        return 'RTL';
1953
      }
1954 9
    }
1955 4
1956 4
    return 'LTR';
1957 4
  }
1958
1959
  /**
1960 4
   * get data from "/data/*.ser"
1961
   *
1962
   * @param string $file
1963 9
   *
1964
   * @return bool|string|array|int <p>Will return false on error.</p>
1965 9
   */
1966 9
  private static function getData($file)
1967
  {
1968 7
    $file = __DIR__ . '/data/' . $file . '.php';
1969
    if (file_exists($file)) {
1970 7
      /** @noinspection PhpIncludeInspection */
1971 6
      return require $file;
1972
    } else {
1973 4
      return false;
1974
    }
1975 9
  }
1976
1977 9
  /**
1978
   * alias for "UTF8::string_has_bom()"
1979
   *
1980 9
   * @see UTF8::string_has_bom()
1981 9
   *
1982 9
   * @param string $str
1983
   *
1984 9
   * @return bool
1985
   *
1986 9
   * @deprecated
1987
   */
1988 9
  public static function hasBom($str)
1989
  {
1990
    return self::string_has_bom($str);
1991
  }
1992
1993
  /**
1994
   * Converts a hexadecimal-value into an UTF-8 character.
1995
   *
1996
   * @param string $hexdec <p>The hexadecimal value.</p>
1997
   *
1998
   * @return string|false <p>One single UTF-8 character.</p>
1999
   */
2000
  public static function hex_to_chr($hexdec)
2001
  {
2002
    return self::decimal_to_chr(hexdec($hexdec));
2003
  }
2004
2005
  /**
2006
   * Converts hexadecimal U+xxxx code point representation to integer.
2007
   *
2008
   * INFO: opposite to UTF8::int_to_hex()
2009
   *
2010
   * @param string $hexdec <p>The hexadecimal code point representation.</p>
2011
   *
2012
   * @return int|false <p>The code point, or false on failure.</p>
2013
   */
2014
  public static function hex_to_int($hexdec)
2015
  {
2016
    $hexdec = (string)$hexdec;
2017
2018
    if (!isset($hexdec[0])) {
2019
      return false;
2020
    }
2021
2022
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexdec, $match)) {
2023
      return intval($match[1], 16);
2024
    }
2025
2026
    return false;
2027
  }
2028
2029
  /**
2030
   * alias for "UTF8::html_entity_decode()"
2031
   *
2032
   * @see UTF8::html_entity_decode()
2033
   *
2034
   * @param string $str
2035
   * @param int    $flags
2036
   * @param string $encoding
2037
   *
2038
   * @return string
2039
   */
2040
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2041
  {
2042
    return self::html_entity_decode($str, $flags, $encoding);
2043
  }
2044
2045
  /**
2046
   * Converts a UTF-8 string to a series of HTML numbered entities.
2047
   *
2048
   * INFO: opposite to UTF8::html_decode()
2049
   *
2050
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2051
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2052
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2053
   *
2054
   * @return string <p>HTML numbered entities.</p>
2055
   */
2056
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2057
  {
2058
    // init
2059
    $str = (string)$str;
2060
2061
    if (!isset($str[0])) {
2062
      return '';
2063
    }
2064
2065
    if ($encoding !== 'UTF-8') {
2066
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2067
    }
2068
2069
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2070
    if (function_exists('mb_encode_numericentity')) {
2071
2072
      $startCode = 0x00;
2073
      if ($keepAsciiChars === true) {
2074
        $startCode = 0x80;
2075
      }
2076
2077
      return mb_encode_numericentity(
2078
          $str,
2079
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2080
          $encoding
2081
      );
2082
    }
2083
2084
    return implode(
2085
        '',
2086
        array_map(
2087
            function ($data) use ($keepAsciiChars, $encoding) {
2088
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2089
            },
2090
            self::split($str)
2091
        )
2092
    );
2093
  }
2094 2
2095
  /**
2096 2
   * UTF-8 version of html_entity_decode()
2097 1
   *
2098 1
   * The reason we are not using html_entity_decode() by itself is because
2099
   * while it is not technically correct to leave out the semicolon
2100 2
   * at the end of an entity most browsers will still interpret the entity
2101
   * correctly. html_entity_decode() does not convert entities without
2102 2
   * semicolons, so we are left with our own little solution here. Bummer.
2103 1
   *
2104
   * Convert all HTML entities to their applicable characters
2105
   *
2106 2
   * INFO: opposite to UTF8::html_encode()
2107 2
   *
2108 2
   * @link http://php.net/manual/en/function.html-entity-decode.php
2109 2
   *
2110 2
   * @param string $str      <p>
2111 1
   *                         The input string.
2112
   *                         </p>
2113 1
   * @param int    $flags    [optional] <p>
2114 1
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2115 1
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2116 1
   *                         <table>
2117 1
   *                         Available <i>flags</i> constants
2118 2
   *                         <tr valign="top">
2119
   *                         <td>Constant Name</td>
2120 2
   *                         <td>Description</td>
2121
   *                         </tr>
2122
   *                         <tr valign="top">
2123
   *                         <td><b>ENT_COMPAT</b></td>
2124
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2125
   *                         </tr>
2126
   *                         <tr valign="top">
2127
   *                         <td><b>ENT_QUOTES</b></td>
2128
   *                         <td>Will convert both double and single quotes.</td>
2129
   *                         </tr>
2130
   *                         <tr valign="top">
2131
   *                         <td><b>ENT_NOQUOTES</b></td>
2132
   *                         <td>Will leave both double and single quotes unconverted.</td>
2133
   *                         </tr>
2134
   *                         <tr valign="top">
2135
   *                         <td><b>ENT_HTML401</b></td>
2136
   *                         <td>
2137
   *                         Handle code as HTML 4.01.
2138
   *                         </td>
2139
   *                         </tr>
2140
   *                         <tr valign="top">
2141
   *                         <td><b>ENT_XML1</b></td>
2142
   *                         <td>
2143
   *                         Handle code as XML 1.
2144
   *                         </td>
2145
   *                         </tr>
2146
   *                         <tr valign="top">
2147
   *                         <td><b>ENT_XHTML</b></td>
2148
   *                         <td>
2149
   *                         Handle code as XHTML.
2150
   *                         </td>
2151
   *                         </tr>
2152
   *                         <tr valign="top">
2153
   *                         <td><b>ENT_HTML5</b></td>
2154
   *                         <td>
2155
   *                         Handle code as HTML 5.
2156
   *                         </td>
2157
   *                         </tr>
2158
   *                         </table>
2159
   *                         </p>
2160
   * @param string $encoding [optional] <p>Encoding to use.</p>
2161
   *
2162
   * @return string <p>The decoded string.</p>
2163
   */
2164
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2165
  {
2166
    // init
2167
    $str = (string)$str;
2168
2169
    if (!isset($str[0])) {
2170
      return '';
2171
    }
2172
2173
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2174
      return $str;
2175
    }
2176
2177
    if (
2178
        strpos($str, '&') === false
2179
        ||
2180
        (
2181
            strpos($str, '&#') === false
2182
            &&
2183
            strpos($str, ';') === false
2184
        )
2185
    ) {
2186
      return $str;
2187
    }
2188
2189
    if ($encoding !== 'UTF-8') {
2190
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2191
    }
2192
2193
    if ($flags === null) {
2194
      if (Bootup::is_php('5.4') === true) {
2195
        $flags = ENT_QUOTES | ENT_HTML5;
2196
      } else {
2197
        $flags = ENT_QUOTES;
2198
      }
2199
    }
2200
2201
    do {
2202
      $str_compare = $str;
2203
2204
      $str = preg_replace_callback(
2205
          "/&#\d{2,6};/",
2206
          function ($matches) use ($encoding) {
2207
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2208
2209
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2210
              return $returnTmp;
2211
            } else {
2212
              return $matches[0];
2213
            }
2214
          },
2215
          $str
2216
      );
2217
2218
      // decode numeric & UTF16 two byte entities
2219
      $str = html_entity_decode(
2220
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2221
          $flags,
2222
          $encoding
2223
      );
2224
2225
    } while ($str_compare !== $str);
2226
2227
    return $str;
2228
  }
2229
2230
  /**
2231
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2232 1
   *
2233
   * @link http://php.net/manual/en/function.htmlentities.php
2234 1
   *
2235
   * @param string $str           <p>
2236
   *                              The input string.
2237
   *                              </p>
2238 1
   * @param int    $flags         [optional] <p>
2239
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2240
   *                              invalid code unit sequences and the used document type. The default is
2241
   *                              ENT_COMPAT | ENT_HTML401.
2242
   *                              <table>
2243
   *                              Available <i>flags</i> constants
2244
   *                              <tr valign="top">
2245
   *                              <td>Constant Name</td>
2246 1
   *                              <td>Description</td>
2247
   *                              </tr>
2248 1
   *                              <tr valign="top">
2249
   *                              <td><b>ENT_COMPAT</b></td>
2250
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2251
   *                              </tr>
2252
   *                              <tr valign="top">
2253
   *                              <td><b>ENT_QUOTES</b></td>
2254
   *                              <td>Will convert both double and single quotes.</td>
2255
   *                              </tr>
2256
   *                              <tr valign="top">
2257
   *                              <td><b>ENT_NOQUOTES</b></td>
2258
   *                              <td>Will leave both double and single quotes unconverted.</td>
2259
   *                              </tr>
2260
   *                              <tr valign="top">
2261 3
   *                              <td><b>ENT_IGNORE</b></td>
2262
   *                              <td>
2263 3
   *                              Silently discard invalid code unit sequences instead of returning
2264 3
   *                              an empty string. Using this flag is discouraged as it
2265
   *                              may have security implications.
2266 3
   *                              </td>
2267
   *                              </tr>
2268 3
   *                              <tr valign="top">
2269
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2270
   *                              <td>
2271
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2272
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2273
   *                              </td>
2274
   *                              </tr>
2275
   *                              <tr valign="top">
2276
   *                              <td><b>ENT_DISALLOWED</b></td>
2277
   *                              <td>
2278
   *                              Replace invalid code points for the given document type with a
2279 1
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2280
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2281 1
   *                              instance, to ensure the well-formedness of XML documents with
2282
   *                              embedded external content.
2283
   *                              </td>
2284
   *                              </tr>
2285
   *                              <tr valign="top">
2286
   *                              <td><b>ENT_HTML401</b></td>
2287
   *                              <td>
2288
   *                              Handle code as HTML 4.01.
2289 2
   *                              </td>
2290
   *                              </tr>
2291 2
   *                              <tr valign="top">
2292
   *                              <td><b>ENT_XML1</b></td>
2293
   *                              <td>
2294
   *                              Handle code as XML 1.
2295
   *                              </td>
2296
   *                              </tr>
2297
   *                              <tr valign="top">
2298
   *                              <td><b>ENT_XHTML</b></td>
2299
   *                              <td>
2300
   *                              Handle code as XHTML.
2301
   *                              </td>
2302
   *                              </tr>
2303 2
   *                              <tr valign="top">
2304
   *                              <td><b>ENT_HTML5</b></td>
2305 2
   *                              <td>
2306
   *                              Handle code as HTML 5.
2307
   *                              </td>
2308
   *                              </tr>
2309
   *                              </table>
2310
   *                              </p>
2311
   * @param string $encoding      [optional] <p>
2312
   *                              Like <b>htmlspecialchars</b>,
2313
   *                              <b>htmlentities</b> takes an optional third argument
2314
   *                              <i>encoding</i> which defines encoding used in
2315
   *                              conversion.
2316
   *                              Although this argument is technically optional, you are highly
2317 1
   *                              encouraged to specify the correct value for your code.
2318
   *                              </p>
2319 1
   * @param bool   $double_encode [optional] <p>
2320
   *                              When <i>double_encode</i> is turned off PHP will not
2321
   *                              encode existing html entities. The default is to convert everything.
2322
   *                              </p>
2323
   *
2324
   *
2325
   * @return string the encoded string.
2326
   * </p>
2327
   * <p>
2328
   * If the input <i>string</i> contains an invalid code unit
2329
   * sequence within the given <i>encoding</i> an empty string
2330
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2331
   * <b>ENT_SUBSTITUTE</b> flags are set.
2332
   */
2333
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2334
  {
2335
    if ($encoding !== 'UTF-8') {
2336
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2337
    }
2338
2339
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2340
2341
    if ($encoding !== 'UTF-8') {
2342
      return $str;
2343
    }
2344
2345
    $byteLengths = self::chr_size_list($str);
2346
    $search = array();
2347
    $replacements = array();
2348
    foreach ($byteLengths as $counter => $byteLength) {
2349
      if ($byteLength >= 3) {
2350
        $char = self::access($str, $counter);
2351
2352
        if (!isset($replacements[$char])) {
2353
          $search[$char] = $char;
2354
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2350 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2355
        }
2356
      }
2357
    }
2358
2359 1
    return str_replace($search, $replacements, $str);
2360
  }
2361 1
2362
  /**
2363
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2364
   *
2365
   * INFO: Take a look at "UTF8::htmlentities()"
2366
   *
2367
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2368
   *
2369
   * @param string $str           <p>
2370
   *                              The string being converted.
2371
   *                              </p>
2372
   * @param int    $flags         [optional] <p>
2373
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2374
   *                              invalid code unit sequences and the used document type. The default is
2375
   *                              ENT_COMPAT | ENT_HTML401.
2376
   *                              <table>
2377
   *                              Available <i>flags</i> constants
2378
   *                              <tr valign="top">
2379
   *                              <td>Constant Name</td>
2380
   *                              <td>Description</td>
2381
   *                              </tr>
2382
   *                              <tr valign="top">
2383
   *                              <td><b>ENT_COMPAT</b></td>
2384
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2385
   *                              </tr>
2386
   *                              <tr valign="top">
2387 1
   *                              <td><b>ENT_QUOTES</b></td>
2388
   *                              <td>Will convert both double and single quotes.</td>
2389 1
   *                              </tr>
2390
   *                              <tr valign="top">
2391
   *                              <td><b>ENT_NOQUOTES</b></td>
2392
   *                              <td>Will leave both double and single quotes unconverted.</td>
2393
   *                              </tr>
2394
   *                              <tr valign="top">
2395
   *                              <td><b>ENT_IGNORE</b></td>
2396
   *                              <td>
2397
   *                              Silently discard invalid code unit sequences instead of returning
2398
   *                              an empty string. Using this flag is discouraged as it
2399
   *                              may have security implications.
2400
   *                              </td>
2401 1
   *                              </tr>
2402
   *                              <tr valign="top">
2403 1
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2404
   *                              <td>
2405
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2406
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2407
   *                              </td>
2408
   *                              </tr>
2409
   *                              <tr valign="top">
2410
   *                              <td><b>ENT_DISALLOWED</b></td>
2411
   *                              <td>
2412
   *                              Replace invalid code points for the given document type with a
2413
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2414
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2415
   *                              instance, to ensure the well-formedness of XML documents with
2416 16
   *                              embedded external content.
2417
   *                              </td>
2418 16
   *                              </tr>
2419
   *                              <tr valign="top">
2420
   *                              <td><b>ENT_HTML401</b></td>
2421
   *                              <td>
2422
   *                              Handle code as HTML 4.01.
2423
   *                              </td>
2424
   *                              </tr>
2425
   *                              <tr valign="top">
2426
   *                              <td><b>ENT_XML1</b></td>
2427
   *                              <td>
2428
   *                              Handle code as XML 1.
2429
   *                              </td>
2430
   *                              </tr>
2431 28
   *                              <tr valign="top">
2432
   *                              <td><b>ENT_XHTML</b></td>
2433 28
   *                              <td>
2434
   *                              Handle code as XHTML.
2435 28
   *                              </td>
2436 5
   *                              </tr>
2437
   *                              <tr valign="top">
2438
   *                              <td><b>ENT_HTML5</b></td>
2439 28
   *                              <td>
2440
   *                              Handle code as HTML 5.
2441
   *                              </td>
2442
   *                              </tr>
2443
   *                              </table>
2444
   *                              </p>
2445
   * @param string $encoding      [optional] <p>
2446
   *                              Defines encoding used in conversion.
2447
   *                              </p>
2448
   *                              <p>
2449 1
   *                              For the purposes of this function, the encodings
2450
   *                              ISO-8859-1, ISO-8859-15,
2451 1
   *                              UTF-8, cp866,
2452
   *                              cp1251, cp1252, and
2453 1
   *                              KOI8-R are effectively equivalent, provided the
2454 1
   *                              <i>string</i> itself is valid for the encoding, as
2455
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2456
   *                              the same positions in all of these encodings.
2457 1
   *                              </p>
2458 1
   * @param bool   $double_encode [optional] <p>
2459
   *                              When <i>double_encode</i> is turned off PHP will not
2460 1
   *                              encode existing html entities, the default is to convert everything.
2461
   *                              </p>
2462
   *
2463
   * @return string The converted string.
2464
   * </p>
2465
   * <p>
2466
   * If the input <i>string</i> contains an invalid code unit
2467
   * sequence within the given <i>encoding</i> an empty string
2468
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2469
   * <b>ENT_SUBSTITUTE</b> flags are set.
2470
   */
2471 16
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2472
  {
2473
    if ($encoding !== 'UTF-8') {
2474 16
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2475
    }
2476
2477 16
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2478
  }
2479 16
2480 16
  /**
2481 15
   * Checks whether iconv is available on the server.
2482 16
   *
2483 6
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2484
   */
2485 15
  public static function iconv_loaded()
2486
  {
2487
    $return = extension_loaded('iconv') ? true : false;
2488
2489
    // INFO: "default_charset" is already set by the "Bootup"-class
2490
2491
    if (Bootup::is_php('5.6') === false) {
2492
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2493
      iconv_set_encoding('input_encoding', 'UTF-8');
2494
      iconv_set_encoding('output_encoding', 'UTF-8');
2495
      iconv_set_encoding('internal_encoding', 'UTF-8');
2496
    }
2497
2498
    return $return;
2499
  }
2500
2501
  /**
2502
   * alias for "UTF8::decimal_to_chr()"
2503
   *
2504
   * @see UTF8::decimal_to_chr()
2505
   *
2506
   * @param int $int
2507
   *
2508
   * @return string
2509
   */
2510
  public static function int_to_chr($int)
2511
  {
2512
    return self::decimal_to_chr($int);
2513
  }
2514
2515
  /**
2516
   * Converts Integer to hexadecimal U+xxxx code point representation.
2517
   *
2518
   * INFO: opposite to UTF8::hex_to_int()
2519
   *
2520
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2521
   * @param string $pfix [optional]
2522
   *
2523
   * @return string <p>The code point, or empty string on failure.</p>
2524
   */
2525
  public static function int_to_hex($int, $pfix = 'U+')
2526
  {
2527
    if ((int)$int === $int) {
2528
      $hex = dechex($int);
2529
2530
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2531
2532
      return $pfix . $hex;
2533
    }
2534
2535
    return '';
2536 1
  }
2537
2538 1
  /**
2539
   * Checks whether intl-char is available on the server.
2540 1
   *
2541
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2542
   */
2543
  public static function intlChar_loaded()
2544
  {
2545 1
    return (
2546
        Bootup::is_php('7.0') === true
2547 1
        &&
2548
        class_exists('IntlChar') === true
2549 1
    );
2550 1
  }
2551
2552 1
  /**
2553
   * Checks whether intl is available on the server.
2554
   *
2555
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2556
   */
2557
  public static function intl_loaded()
2558
  {
2559
    return extension_loaded('intl') ? true : false;
2560
  }
2561
2562
  /**
2563 1
   * alias for "UTF8::is_ascii()"
2564
   *
2565 1
   * @see UTF8::is_ascii()
2566
   *
2567 1
   * @param string $str
2568
   *
2569
   * @return boolean
2570
   *
2571
   * @deprecated
2572 1
   */
2573 1
  public static function isAscii($str)
2574 1
  {
2575 1
    return self::is_ascii($str);
2576 1
  }
2577
2578 1
  /**
2579
   * alias for "UTF8::is_base64()"
2580
   *
2581
   * @see UTF8::is_base64()
2582
   *
2583
   * @param string $str
2584
   *
2585
   * @return bool
2586
   *
2587
   * @deprecated
2588
   */
2589
  public static function isBase64($str)
2590
  {
2591
    return self::is_base64($str);
2592
  }
2593 4
2594
  /**
2595 4
   * alias for "UTF8::is_binary()"
2596
   *
2597 4
   * @see UTF8::is_binary()
2598
   *
2599 4
   * @param string $str
2600 4
   *
2601 4
   * @return bool
2602 4
   *
2603 4
   * @deprecated
2604 4
   */
2605 4
  public static function isBinary($str)
2606 4
  {
2607 4
    return self::is_binary($str);
2608 2
  }
2609 2
2610 4
  /**
2611 4
   * alias for "UTF8::is_bom()"
2612 4
   *
2613
   * @see UTF8::is_bom()
2614 4
   *
2615 4
   * @param string $utf8_chr
2616 4
   *
2617 4
   * @return boolean
2618 4
   *
2619 4
   * @deprecated
2620 4
   */
2621 4
  public static function isBom($utf8_chr)
2622 4
  {
2623 3
    return self::is_bom($utf8_chr);
2624 3
  }
2625 4
2626 4
  /**
2627 4
   * alias for "UTF8::is_html()"
2628
   *
2629 4
   * @see UTF8::is_html()
2630 3
   *
2631 2
   * @param string $str
2632
   *
2633 3
   * @return boolean
2634
   *
2635
   * @deprecated
2636
   */
2637 3
  public static function isHtml($str)
2638
  {
2639 3
    return self::is_html($str);
2640
  }
2641
2642
  /**
2643
   * alias for "UTF8::is_json()"
2644
   *
2645
   * @see UTF8::is_json()
2646
   *
2647
   * @param string $str
2648
   *
2649
   * @return bool
2650
   *
2651
   * @deprecated
2652
   */
2653 3
  public static function isJson($str)
2654
  {
2655 3
    return self::is_json($str);
2656
  }
2657 3
2658
  /**
2659 3
   * alias for "UTF8::is_utf16()"
2660 3
   *
2661 3
   * @see UTF8::is_utf16()
2662 3
   *
2663 3
   * @param string $str
2664 3
   *
2665 3
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2666 3
   *
2667 3
   * @deprecated
2668 1
   */
2669 1
  public static function isUtf16($str)
2670 3
  {
2671 3
    return self::is_utf16($str);
2672 3
  }
2673
2674 3
  /**
2675 3
   * alias for "UTF8::is_utf32()"
2676 3
   *
2677 3
   * @see UTF8::is_utf32()
2678 3
   *
2679 3
   * @param string $str
2680 3
   *
2681 3
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2682 3
   *
2683 1
   * @deprecated
2684 1
   */
2685 3
  public static function isUtf32($str)
2686 3
  {
2687 3
    return self::is_utf32($str);
2688
  }
2689 3
2690 1
  /**
2691 1
   * alias for "UTF8::is_utf8()"
2692
   *
2693 1
   * @see UTF8::is_utf8()
2694
   *
2695
   * @param string $str
2696
   * @param bool   $strict
2697 3
   *
2698
   * @return bool
2699 3
   *
2700
   * @deprecated
2701
   */
2702
  public static function isUtf8($str, $strict = false)
2703
  {
2704
    return self::is_utf8($str, $strict);
2705
  }
2706
2707
  /**
2708
   * Checks if a string is 7 bit ASCII.
2709
   *
2710
   * @param string $str <p>The string to check.</p>
2711
   *
2712 43
   * @return bool <p>
2713
   *              <strong>true</strong> if it is ASCII<br />
2714 43
   *              <strong>false</strong> otherwise
2715
   *              </p>
2716 43
   */
2717 3
  public static function is_ascii($str)
2718
  {
2719
    $str = (string)$str;
2720 41
2721 1
    if (!isset($str[0])) {
2722 1
      return true;
2723
    }
2724
2725
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2726
  }
2727
2728
  /**
2729
   * Returns true if the string is base64 encoded, false otherwise.
2730 41
   *
2731
   * @param string $str <p>The input string.</p>
2732
   *
2733
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2734
   */
2735
  public static function is_base64($str)
2736
  {
2737
    $str = (string)$str;
2738
2739
    if (!isset($str[0])) {
2740 41
      return false;
2741
    }
2742 41
2743 41
    $base64String = (string)base64_decode($str, true);
2744 41
    if ($base64String && base64_encode($base64String) === $str) {
2745
      return true;
2746
    } else {
2747 41
      return false;
2748 41
    }
2749 41
  }
2750
2751
  /**
2752 41
   * Check if the input is binary... (is look like a hack).
2753
   *
2754 36
   * @param mixed $input
2755 41
   *
2756
   * @return bool
2757 34
   */
2758 34
  public static function is_binary($input)
2759 34
  {
2760 34
    $input = (string)$input;
2761 39
2762
    if (!isset($input[0])) {
2763 21
      return false;
2764 21
    }
2765 21
2766 21
    if (preg_match('~^[01]+$~', $input)) {
2767 33
      return true;
2768
    }
2769 9
2770 9
    $testLength = strlen($input);
2771 9
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2772 9
      return true;
2773 16
    }
2774
2775
    if (substr_count($input, "\x00") > 0) {
2776
      return true;
2777
    }
2778
2779
    return false;
2780
  }
2781
2782 3
  /**
2783 3
   * Check if the file is binary.
2784 3
   *
2785 3
   * @param string $file
2786 9
   *
2787
   * @return boolean
2788 3
   */
2789 3
  public static function is_binary_file($file)
2790 3
  {
2791 3
    try {
2792 3
      $fp = fopen($file, 'rb');
2793
      $block = fread($fp, 512);
2794
      fclose($fp);
2795
    } catch (\Exception $e) {
2796 5
      $block = '';
2797
    }
2798 41
2799
    return self::is_binary($block);
2800
  }
2801 36
2802
  /**
2803 33
   * Checks if the given string is equal to any "Byte Order Mark".
2804 33
   *
2805 33
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2806 33
   *
2807
   * @param string $str <p>The input string.</p>
2808
   *
2809
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2810
   */
2811 33
  public static function is_bom($str)
2812
  {
2813
    foreach (self::$BOM as $bomString => $bomByteLength) {
2814
      if ($str === $bomString) {
2815
        return true;
2816
      }
2817 33
    }
2818 33
2819 33
    return false;
2820 33
  }
2821
2822 33
  /**
2823
   * Check if the string contains any html-tags <lall>.
2824 33
   *
2825 33
   * @param string $str <p>The input string.</p>
2826 5
   *
2827
   * @return boolean
2828
   */
2829 33
  public static function is_html($str)
2830 33
  {
2831 33
    $str = (string)$str;
2832 33
2833 33
    if (!isset($str[0])) {
2834
      return false;
2835
    }
2836
2837
    // init
2838 18
    $matches = array();
2839
2840
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2841 41
2842
    if (count($matches) === 0) {
2843 20
      return false;
2844
    } else {
2845
      return true;
2846
    }
2847
  }
2848
2849
  /**
2850
   * Try to check if "$str" is an json-string.
2851
   *
2852
   * @param string $str <p>The input string.</p>
2853
   *
2854
   * @return bool
2855
   */
2856
  public static function is_json($str)
2857
  {
2858
    $str = (string)$str;
2859
2860
    if (!isset($str[0])) {
2861
      return false;
2862
    }
2863
2864
    $json = self::json_decode($str);
2865
2866
    if (
2867
        (
2868
            is_object($json) === true
2869
            ||
2870
            is_array($json) === true
2871
        )
2872
        &&
2873
        json_last_error() === JSON_ERROR_NONE
2874
    ) {
2875
      return true;
2876
    } else {
2877
      return false;
2878
    }
2879
  }
2880
2881
  /**
2882
   * Check if the string is UTF-16.
2883 2
   *
2884
   * @param string $str <p>The input string.</p>
2885 2
   *
2886
   * @return int|false <p>
2887 2
   *                   <strong>false</strong> if is't not UTF-16,<br />
2888 2
   *                   <strong>1</strong> for UTF-16LE,<br />
2889 2
   *                   <strong>2</strong> for UTF-16BE.
2890
   *                   </p>
2891
   */
2892 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2893 2
  {
2894
    $str = self::remove_bom($str);
2895
2896
    if (self::is_binary($str) === true) {
2897
2898
      $maybeUTF16LE = 0;
2899
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2900
      if ($test) {
2901
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2902
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2903
        if ($test3 === $test) {
2904
          $strChars = self::count_chars($str, true);
2905
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2906
            if (in_array($test3char, $strChars, true) === true) {
2907
              $maybeUTF16LE++;
2908
            }
2909
          }
2910
        }
2911
      }
2912
2913
      $maybeUTF16BE = 0;
2914
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2915
      if ($test) {
2916
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2917
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2918
        if ($test3 === $test) {
2919
          $strChars = self::count_chars($str, true);
2920
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2921
            if (in_array($test3char, $strChars, true) === true) {
2922
              $maybeUTF16BE++;
2923
            }
2924
          }
2925
        }
2926
      }
2927
2928
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2929
        if ($maybeUTF16LE > $maybeUTF16BE) {
2930
          return 1;
2931
        } else {
2932 2
          return 2;
2933
        }
2934 2
      }
2935
2936 2
    }
2937
2938
    return false;
2939 2
  }
2940
2941
  /**
2942 2
   * Check if the string is UTF-32.
2943
   *
2944
   * @param string $str
2945
   *
2946
   * @return int|false <p>
2947
   *                   <strong>false</strong> if is't not UTF-16,<br />
2948
   *                   <strong>1</strong> for UTF-32LE,<br />
2949
   *                   <strong>2</strong> for UTF-32BE.
2950
   *                   </p>
2951
   */
2952 6 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2953
  {
2954 6
    $str = self::remove_bom($str);
2955
2956
    if (self::is_binary($str) === true) {
2957
2958
      $maybeUTF32LE = 0;
2959
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2960
      if ($test) {
2961
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2962
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2963
        if ($test3 === $test) {
2964
          $strChars = self::count_chars($str, true);
2965 24
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2966
            if (in_array($test3char, $strChars, true) === true) {
2967 24
              $maybeUTF32LE++;
2968
            }
2969 24
          }
2970 2
        }
2971
      }
2972
2973
      $maybeUTF32BE = 0;
2974 23
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2975 2
      if ($test) {
2976
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2977
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2978 23
        if ($test3 === $test) {
2979
          $strChars = self::count_chars($str, true);
2980 23
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2981
            if (in_array($test3char, $strChars, true) === true) {
2982
              $maybeUTF32BE++;
2983
            }
2984
          }
2985
        }
2986
      }
2987
2988
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2989
        if ($maybeUTF32LE > $maybeUTF32BE) {
2990 1
          return 1;
2991
        } else {
2992 1
          return 2;
2993
        }
2994
      }
2995
2996 1
    }
2997
2998
    return false;
2999
  }
3000
3001
  /**
3002
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3003
   *
3004
   * @see    http://hsivonen.iki.fi/php-utf8/
3005
   *
3006
   * @param string $str    <p>The string to be checked.</p>
3007 1
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3008
   *
3009 1
   * @return bool
3010 1
   */
3011 1
  public static function is_utf8($str, $strict = false)
3012
  {
3013 1
    $str = (string)$str;
3014
3015
    if (!isset($str[0])) {
3016
      return true;
3017
    }
3018
3019
    if ($strict === true) {
3020
      if (self::is_utf16($str) !== false) {
3021
        return false;
3022 2
      }
3023
3024 2
      if (self::is_utf32($str) !== false) {
3025
        return false;
3026 2
      }
3027 2
    }
3028 2
3029
    if (self::pcre_utf8_support() !== true) {
3030 2
3031
      // If even just the first character can be matched, when the /u
3032
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3033
      // invalid, nothing at all will match, even if the string contains
3034
      // some valid sequences
3035
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3036
3037
    } else {
3038
3039
      $mState = 0; // cached expected number of octets after the current octet
3040 1
      // until the beginning of the next UTF8 character sequence
3041
      $mUcs4 = 0; // cached Unicode character
3042 1
      $mBytes = 1; // cached expected number of octets in the current sequence
3043
      $len = strlen($str);
3044
3045
      /** @noinspection ForeachInvariantsInspection */
3046 1
      for ($i = 0; $i < $len; $i++) {
3047
        $in = ord($str[$i]);
3048
        if ($mState === 0) {
3049
          // When mState is zero we expect either a US-ASCII character or a
3050
          // multi-octet sequence.
3051
          if (0 === (0x80 & $in)) {
3052
            // US-ASCII, pass straight through.
3053
            $mBytes = 1;
3054 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3055
            // First octet of 2 octet sequence.
3056
            $mUcs4 = $in;
3057
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3058 1
            $mState = 1;
3059
            $mBytes = 2;
3060 1
          } elseif (0xE0 === (0xF0 & $in)) {
3061
            // First octet of 3 octet sequence.
3062
            $mUcs4 = $in;
3063
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3064
            $mState = 2;
3065
            $mBytes = 3;
3066 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3067
            // First octet of 4 octet sequence.
3068
            $mUcs4 = $in;
3069
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3070 16
            $mState = 3;
3071
            $mBytes = 4;
3072 16
          } elseif (0xF8 === (0xFC & $in)) {
3073
            /* First octet of 5 octet sequence.
3074 16
            *
3075 2
            * This is illegal because the encoded codepoint must be either
3076
            * (a) not the shortest form or
3077
            * (b) outside the Unicode range of 0-0x10FFFF.
3078 16
            * Rather than trying to resynchronize, we will carry on until the end
3079 1
            * of the sequence and let the later error handling code catch it.
3080
            */
3081
            $mUcs4 = $in;
3082 16
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3083 4
            $mState = 4;
3084
            $mBytes = 5;
3085 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3086 15
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3087 14
            $mUcs4 = $in;
3088
            $mUcs4 = ($mUcs4 & 1) << 30;
3089
            $mState = 5;
3090 4
            $mBytes = 6;
3091 4
          } else {
3092 4
            /* Current octet is neither in the US-ASCII range nor a legal first
3093
             * octet of a multi-octet sequence.
3094
             */
3095 4
            return false;
3096 4
          }
3097 4
        } else {
3098 4
          // When mState is non-zero, we expect a continuation of the multi-octet
3099 4
          // sequence
3100 4
          if (0x80 === (0xC0 & $in)) {
3101 4
            // Legal continuation.
3102 4
            $shift = ($mState - 1) * 6;
3103 4
            $tmp = $in;
3104 4
            $tmp = ($tmp & 0x0000003F) << $shift;
3105 4
            $mUcs4 |= $tmp;
3106 4
            /**
3107 4
             * End of the multi-octet sequence. mUcs4 now contains the final
3108 4
             * Unicode code point to be output
3109 4
             */
3110
            if (0 === --$mState) {
3111 4
              /*
3112 4
              * Check for illegal sequences and code points.
3113 4
              */
3114
              // From Unicode 3.1, non-shortest form is illegal
3115 4
              if (
3116
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3117 4
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3118
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3119
                  (4 < $mBytes) ||
3120
                  // From Unicode 3.2, surrogate characters are illegal.
3121
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3122
                  // Code points outside the Unicode range are illegal.
3123
                  ($mUcs4 > 0x10FFFF)
3124
              ) {
3125
                return false;
3126
              }
3127 13
              // initialize UTF8 cache
3128
              $mState = 0;
3129 13
              $mUcs4 = 0;
3130 13
              $mBytes = 1;
3131
            }
3132 13
          } else {
3133 1
            /**
3134 1
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3135 1
             * Incomplete multi-octet sequence.
3136
             */
3137 13
            return false;
3138
          }
3139
        }
3140
      }
3141
3142
      return true;
3143
    }
3144
  }
3145
3146
  /**
3147
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3148
   * Decodes a JSON string
3149
   *
3150 18
   * @link http://php.net/manual/en/function.json-decode.php
3151
   *
3152 18
   * @param string $json    <p>
3153 18
   *                        The <i>json</i> string being decoded.
3154
   *                        </p>
3155 18
   *                        <p>
3156
   *                        This function only works with UTF-8 encoded strings.
3157 18
   *                        </p>
3158
   *                        <p>PHP implements a superset of
3159 2
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3160
   *                        only supports these values when they are nested inside an array or an object.
3161 2
   *                        </p>
3162
   * @param bool   $assoc   [optional] <p>
3163 1
   *                        When <b>TRUE</b>, returned objects will be converted into
3164 1
   *                        associative arrays.
3165
   *                        </p>
3166 2
   * @param int    $depth   [optional] <p>
3167 2
   *                        User specified recursion depth.
3168
   *                        </p>
3169 18
   * @param int    $options [optional] <p>
3170 18
   *                        Bitmask of JSON decode options. Currently only
3171 1
   *                        <b>JSON_BIGINT_AS_STRING</b>
3172 1
   *                        is supported (default is to cast large integers as floats)
3173
   *                        </p>
3174 18
   *
3175 18
   * @return mixed the value encoded in <i>json</i> in appropriate
3176
   * PHP type. Values true, false and
3177 18
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3178
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3179
   * <i>json</i> cannot be decoded or if the encoded
3180
   * data is deeper than the recursion limit.
3181
   */
3182 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3183
  {
3184
    $json = (string)self::filter($json);
3185
3186
    if (Bootup::is_php('5.4') === true) {
3187
      $json = json_decode($json, $assoc, $depth, $options);
3188
    } else {
3189
      $json = json_decode($json, $assoc, $depth);
3190
    }
3191
3192
    return $json;
3193
  }
3194
3195
  /**
3196
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3197
   * Returns the JSON representation of a value.
3198
   *
3199
   * @link http://php.net/manual/en/function.json-encode.php
3200
   *
3201
   * @param mixed $value   <p>
3202
   *                       The <i>value</i> being encoded. Can be any type except
3203
   *                       a resource.
3204
   *                       </p>
3205
   *                       <p>
3206
   *                       All string data must be UTF-8 encoded.
3207
   *                       </p>
3208
   *                       <p>PHP implements a superset of
3209
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3210
   *                       only supports these values when they are nested inside an array or an object.
3211
   *                       </p>
3212
   * @param int   $options [optional] <p>
3213
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3214
   *                       <b>JSON_HEX_TAG</b>,
3215
   *                       <b>JSON_HEX_AMP</b>,
3216
   *                       <b>JSON_HEX_APOS</b>,
3217
   *                       <b>JSON_NUMERIC_CHECK</b>,
3218
   *                       <b>JSON_PRETTY_PRINT</b>,
3219
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3220
   *                       <b>JSON_FORCE_OBJECT</b>,
3221
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3222
   *                       constants is described on
3223
   *                       the JSON constants page.
3224
   *                       </p>
3225
   * @param int   $depth   [optional] <p>
3226
   *                       Set the maximum depth. Must be greater than zero.
3227
   *                       </p>
3228
   *
3229
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3230 17
   */
3231 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3232 17
  {
3233 3
    $value = self::filter($value);
3234
3235
    if (Bootup::is_php('5.5') === true) {
3236 16
      $json = json_encode($value, $options, $depth);
3237
    } else {
3238
      $json = json_encode($value, $options);
3239
    }
3240 16
3241
    return $json;
3242
  }
3243
3244
  /**
3245
   * Makes string's first char lowercase.
3246
   *
3247
   * @param string $str <p>The input string</p>
3248 16
   *
3249 16
   * @return string <p>The resulting string</p>
3250 15
   */
3251
  public static function lcfirst($str)
3252
  {
3253 9
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3254 9
  }
3255 9
3256
  /**
3257 9
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3258 1
   *
3259
   * @param string $str   <p>The string to be trimmed</p>
3260
   * @param string $chars <p>Optional characters to be stripped</p>
3261 9
   *
3262 4
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3263
   */
3264 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3265 9
  {
3266 5
    $str = (string)$str;
3267
3268
    if (!isset($str[0])) {
3269 9
      return '';
3270
    }
3271
3272
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3273
    if ($chars === INF || !$chars) {
3274
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3275
    }
3276
3277
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3278
  }
3279
3280
  /**
3281
   * Returns the UTF-8 character with the maximum code point in the given data.
3282
   *
3283
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3284
   *
3285 1
   * @return string <p>The character with the highest code point than others.</p>
3286
   */
3287 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3288 1
  {
3289
    if (is_array($arg) === true) {
3290 1
      $arg = implode('', $arg);
3291 1
    }
3292 1
3293
    return self::chr(max(self::codepoints($arg)));
3294
  }
3295 1
3296
  /**
3297
   * Calculates and returns the maximum number of bytes taken by any
3298
   * UTF-8 encoded character in the given string.
3299
   *
3300
   * @param string $str <p>The original Unicode string.</p>
3301
   *
3302
   * @return int <p>Max byte lengths of the given chars.</p>
3303 41
   */
3304
  public static function max_chr_width($str)
3305
  {
3306 41
    $bytes = self::chr_size_list($str);
3307
    if (count($bytes) > 0) {
3308
      return (int)max($bytes);
3309
    } else {
3310
      return 0;
3311
    }
3312
  }
3313
3314
  /**
3315
   * Checks whether mbstring is available on the server.
3316
   *
3317 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3318
   */
3319 1
  public static function mbstring_loaded()
3320 1
  {
3321
    $return = extension_loaded('mbstring') ? true : false;
3322
3323 1
    if ($return === true) {
3324 1
      \mb_internal_encoding('UTF-8');
3325 1
    }
3326
3327
    return $return;
3328 1
  }
3329
3330
  /**
3331 1
   * Returns the UTF-8 character with the minimum code point in the given data.
3332
   *
3333
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3334
   *
3335 1
   * @return string <p>The character with the lowest code point than others.</p>
3336 1
   */
3337 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3338
  {
3339
    if (is_array($arg) === true) {
3340 1
      $arg = implode('', $arg);
3341
    }
3342
3343 1
    return self::chr(min(self::codepoints($arg)));
3344
  }
3345
3346
  /**
3347 1
   * alias for "UTF8::normalize_encoding()"
3348
   *
3349 1
   * @see UTF8::normalize_encoding()
3350 1
   *
3351 1
   * @param string $encoding
3352 1
   * @param mixed  $fallback
3353 1
   *
3354
   * @return string
3355
   *
3356
   * @deprecated
3357
   */
3358
  public static function normalizeEncoding($encoding, $fallback = false)
3359
  {
3360
    return self::normalize_encoding($encoding, $fallback);
3361
  }
3362
3363
  /**
3364
   * Normalize the encoding-"name" input.
3365 5
   *
3366
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3367 5
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3368
   *
3369
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3370
   */
3371
  public static function normalize_encoding($encoding, $fallback = false)
3372
  {
3373
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3374
3375
    if (!$encoding) {
3376
      return $fallback;
3377 10
    }
3378
3379 10
    if ('UTF-8' === $encoding) {
3380 10
      return $encoding;
3381 5
    }
3382 5
3383 10
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3384
      return $encoding;
3385 10
    }
3386
3387
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3388
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3389
    }
3390
3391
    $encodingOrig = $encoding;
3392
    $encoding = strtoupper($encoding);
3393
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3394
3395
    $equivalences = array(
3396 1
        'ISO88591'    => 'ISO-8859-1',
3397
        'ISO8859'     => 'ISO-8859-1',
3398 1
        'ISO'         => 'ISO-8859-1',
3399 1
        'LATIN1'      => 'ISO-8859-1',
3400 1
        'LATIN'       => 'ISO-8859-1',
3401
        'WIN1252'     => 'ISO-8859-1',
3402 1
        'WINDOWS1252' => 'ISO-8859-1',
3403 1
        'UTF16'       => 'UTF-16',
3404 1
        'UTF32'       => 'UTF-32',
3405 1
        'UTF8'        => 'UTF-8',
3406 1
        'UTF'         => 'UTF-8',
3407
        'UTF7'        => 'UTF-7',
3408 1
        '8BIT'        => 'CP850',
3409
        'BINARY'      => 'CP850',
3410
    );
3411
3412
    if (!empty($equivalences[$encodingUpperHelper])) {
3413
      $encoding = $equivalences[$encodingUpperHelper];
3414
    }
3415
3416
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3417
3418
    return $encoding;
3419
  }
3420
3421
  /**
3422
   * Normalize some MS Word special characters.
3423
   *
3424 45
   * @param string $str <p>The string to be normalized.</p>
3425
   *
3426
   * @return string
3427 45
   */
3428 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3429
  {
3430
    // init
3431 45
    $str = (string)$str;
3432 45
3433 45
    if (!isset($str[0])) {
3434 45
      return '';
3435
    }
3436 45
3437
    static $UTF8_MSWORD_KEYS_CACHE = null;
3438
    static $UTF8_MSWORD_VALUES_CACHE = null;
3439 45
3440 45
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3441
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3442 45
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3443
    }
3444
3445
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3446
  }
3447
3448
  /**
3449
   * Normalize the whitespace.
3450
   *
3451
   * @param string $str                     <p>The string to be normalized.</p>
3452
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3453 45
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3454
   *                                        bidirectional text chars.</p>
3455 45
   *
3456
   * @return string
3457 45
   */
3458 45
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3459 45
  {
3460
    // init
3461 45
    $str = (string)$str;
3462 45
3463 45
    if (!isset($str[0])) {
3464
      return '';
3465 45
    }
3466
3467
    static $WHITESPACE_CACHE = array();
3468
    $cacheKey = (int)$keepNonBreakingSpace;
3469
3470
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3471
3472
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3473
3474
      if ($keepNonBreakingSpace === true) {
3475
        /** @noinspection OffsetOperationsInspection */
3476 23
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3477
      }
3478 23
3479
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3480 23
    }
3481 5
3482
    if ($keepBidiUnicodeControls === false) {
3483
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3484
3485 19
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3486 3
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3487
      }
3488
3489 18
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3490
    }
3491 18
3492
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3493
  }
3494
3495
  /**
3496
   * Format a number with grouped thousands.
3497
   *
3498
   * @param float  $number
3499
   * @param int    $decimals
3500
   * @param string $dec_point
3501
   * @param string $thousands_sep
3502 52
   *
3503
   * @return string
3504 52
   *    *
3505
   * @deprecated Because this has nothing to do with UTF8. :/
3506 52
   */
3507
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3508 52
  {
3509 40
    $thousands_sep = (string)$thousands_sep;
3510
    $dec_point = (string)$dec_point;
3511
    $number = (float)$number;
3512 18
3513
    if (
3514
        isset($thousands_sep[1], $dec_point[1])
3515 18
        &&
3516 17
        Bootup::is_php('5.4') === true
3517
    ) {
3518 17
      return str_replace(
3519 17
          array(
3520 17
              '.',
3521 2
              ',',
3522 2
          ),
3523
          array(
3524
              $dec_point,
3525 18
              $thousands_sep,
3526
          ),
3527 18
          number_format($number, $decimals, '.', ',')
3528 18
      );
3529 18
    }
3530
3531 18
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3532 18
  }
3533 18
3534
  /**
3535
   * Calculates Unicode code point of the given UTF-8 encoded character.
3536
   *
3537 18
   * INFO: opposite to UTF8::chr()
3538
   *
3539 18
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3540
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3541
   *
3542
   * @return int <p>
3543
   *             Unicode code point of the given character,<br />
3544
   *             0 on invalid UTF-8 byte sequence.
3545
   *             </p>
3546
   */
3547
  public static function ord($chr, $encoding = 'UTF-8')
3548
  {
3549
3550
    if ($encoding !== 'UTF-8') {
3551
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3552
3553
      // check again, if it's still not UTF-8
3554
      /** @noinspection NotOptimalIfConditionsInspection */
3555
      if ($encoding !== 'UTF-8') {
3556
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3557
      }
3558
    }
3559
3560 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3561
      self::checkForSupport();
3562 1
    }
3563 1
3564
    if (self::$SUPPORT['intlChar'] === true) {
3565
      $tmpReturn = \IntlChar::ord($chr);
3566
      if ($tmpReturn) {
3567
        return $tmpReturn;
3568 1
      }
3569 1
    }
3570 1
3571 1
    // use static cache, if there is no support for "\IntlChar"
3572
    static $CHAR_CACHE = array();
3573
    if (isset($CHAR_CACHE[$chr]) === true) {
3574 1
      return $CHAR_CACHE[$chr];
3575
    }
3576
3577
    $chr_orig = $chr;
3578
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3579
    $chr = unpack('C*', substr($chr, 0, 4));
3580
    $code = $chr ? $chr[1] : 0;
3581
3582
    if (0xF0 <= $code && isset($chr[4])) {
3583
      return $CHAR_CACHE[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3584
    }
3585
3586 36
    if (0xE0 <= $code && isset($chr[3])) {
3587
      return $CHAR_CACHE[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3588 36
    }
3589
3590 36
    if (0xC0 <= $code && isset($chr[2])) {
3591 2
      return $CHAR_CACHE[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3592
    }
3593
3594
    return $CHAR_CACHE[$chr_orig] = $code;
3595 36
  }
3596 36
3597
  /**
3598 36
   * Parses the string into an array (into the the second parameter).
3599
   *
3600
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3601
   *          if the second parameter is not set!
3602 36
   *
3603
   * @link http://php.net/manual/en/function.parse-str.php
3604 36
   *
3605 6
   * @param string  $str       <p>The input string.</p>
3606 6
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3607
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3608 36
   *
3609 36
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3610 36
   */
3611 36
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3612 36
  {
3613
    if ($cleanUtf8 === true) {
3614 36
      $str = self::clean($str);
3615
    }
3616
3617
    $return = \mb_parse_str($str, $result);
3618
    if ($return === false || empty($result)) {
3619
      return false;
3620
    }
3621
3622
    return true;
3623
  }
3624
3625
  /**
3626
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3627
   *
3628
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3629
   */
3630
  public static function pcre_utf8_support()
3631
  {
3632
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3633
    return (bool)@preg_match('//u', '');
3634
  }
3635
3636
  /**
3637
   * Create an array containing a range of UTF-8 characters.
3638
   *
3639
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3640
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3641
   *
3642
   * @return array
3643
   */
3644
  public static function range($var1, $var2)
3645
  {
3646 36
    if (!$var1 || !$var2) {
3647 5
      return array();
3648
    }
3649 5
3650 5 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3651
      $start = (int)$var1;
3652
    } elseif (ctype_xdigit($var1)) {
3653 36
      $start = (int)self::hex_to_int($var1);
3654
    } else {
3655
      $start = self::ord($var1);
3656
    }
3657 36
3658
    if (!$start) {
3659
      return array();
3660
    }
3661
3662 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3663
      $end = (int)$var2;
3664
    } elseif (ctype_xdigit($var2)) {
3665
      $end = (int)self::hex_to_int($var2);
3666
    } else {
3667
      $end = self::ord($var2);
3668
    }
3669
3670 12
    if (!$end) {
3671
      return array();
3672
    }
3673
3674
    return array_map(
3675
        array(
3676 12
            '\\voku\\helper\\UTF8',
3677 2
            'chr',
3678 1
        ),
3679 2
        range($start, $end)
3680 1
    );
3681 2
  }
3682
3683 2
  /**
3684
   * Multi decode html entity & fix urlencoded-win1252-chars.
3685
   *
3686 2
   * e.g:
3687
   * 'test+test'                     => 'test+test'
3688
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3689
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3690
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3691
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3692 12
   * 'Düsseldorf'                   => 'Düsseldorf'
3693 3
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3694
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3695
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3696
   *
3697
   * @param string $str          <p>The input string.</p>
3698
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3699
   *
3700 12
   * @return string
3701 9
   */
3702 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3703
  {
3704
    $str = (string)$str;
3705
3706
    if (!isset($str[0])) {
3707
      return '';
3708
    }
3709
3710 6
    $pattern = '/%u([0-9a-f]{3,4})/i';
3711 6
    if (preg_match($pattern, $str)) {
3712 6
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3713 6
    }
3714 6
3715 6
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3716 6
3717 6
    do {
3718 6
      $str_compare = $str;
3719 6
3720 6
      $str = self::fix_simple_utf8(
3721 6
          rawurldecode(
3722 6
              self::html_entity_decode(
3723 6
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3724 6
                  $flags
3725 6
              )
3726 6
          )
3727 6
      );
3728 6
3729 6
    } while ($multi_decode === true && $str_compare !== $str);
3730 6
3731
    return (string)$str;
3732 6
  }
3733 6
3734 6
  /**
3735
   * alias for "UTF8::remove_bom()"
3736
   *
3737
   * @see UTF8::remove_bom()
3738
   *
3739
   * @param string $str
3740
   *
3741
   * @return string
3742
   *
3743
   * @deprecated
3744
   */
3745
  public static function removeBOM($str)
3746
  {
3747
    return self::remove_bom($str);
3748
  }
3749
3750
  /**
3751
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3752
   *
3753
   * @param string $str <p>The input string.</p>
3754
   *
3755
   * @return string <p>String without UTF-BOM</p>
3756
   */
3757
  public static function remove_bom($str)
3758
  {
3759
    $str = (string)$str;
3760
3761
    if (!isset($str[0])) {
3762
      return '';
3763
    }
3764
3765
    foreach (self::$BOM as $bomString => $bomByteLength) {
3766
      if (0 === strpos($str, $bomString)) {
3767
        $str = substr($str, $bomByteLength);
3768
      }
3769
    }
3770
3771
    return $str;
3772
  }
3773
3774
  /**
3775
   * Removes duplicate occurrences of a string in another string.
3776
   *
3777
   * @param string          $str  <p>The base string.</p>
3778 14
   * @param string|string[] $what <p>String to search for in the base string.</p>
3779
   *
3780 14
   * @return string <p>The result string with removed duplicates.</p>
3781
   */
3782
  public static function remove_duplicates($str, $what = ' ')
3783 14
  {
3784 14
    if (is_string($what) === true) {
3785 1
      $what = array($what);
3786 1
    }
3787 13
3788
    if (is_array($what) === true) {
3789 14
      /** @noinspection ForeachSourceInspection */
3790
      foreach ($what as $item) {
3791 14
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3792 14
      }
3793
    }
3794 14
3795
    return $str;
3796
  }
3797
3798
  /**
3799
   * Remove invisible characters from a string.
3800
   *
3801
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3802
   *
3803
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3804
   *
3805
   * @param string $str
3806 1
   * @param bool   $url_encoded
3807
   * @param string $replacement
3808 1
   *
3809
   * @return string
3810 1
   */
3811
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3812
  {
3813
    // init
3814 1
    $non_displayables = array();
3815
3816 1
    // every control character except newline (dec 10),
3817
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3818
    if ($url_encoded) {
3819
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3820 1
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3821 1
    }
3822
3823
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3824 1
3825 1
    do {
3826 1
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3827 1
    } while ($count !== 0);
3828
3829 1
    return $str;
3830
  }
3831
3832 1
  /**
3833
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3834
   *
3835 1
   * @param string $str                <p>The input string</p>
3836
   * @param string $replacementChar    <p>The replacement character.</p>
3837
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3838
   *
3839
   * @return string
3840
   */
3841
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
3842
  {
3843
    $str = (string)$str;
3844
3845
    if (!isset($str[0])) {
3846
      return '';
3847
    }
3848
3849
    if ($processInvalidUtf8 === true) {
3850
      $replacementCharHelper = $replacementChar;
3851 2
      if ($replacementChar === '') {
3852
        $replacementCharHelper = 'none';
3853 2
      }
3854
3855
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3856 2
        self::checkForSupport();
3857 2
      }
3858
3859 2
      if (self::$SUPPORT['mbstring'] === false) {
3860
        trigger_error('UTF8::replace_diamond_question_mark() without mbstring cannot handle all chars correctly', E_USER_WARNING);
3861 2
      }
3862 2
3863
      $save = \mb_substitute_character();
3864 2
      \mb_substitute_character($replacementCharHelper);
3865
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3866
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3867 2
      \mb_substitute_character($save);
3868 2
    }
3869 2
3870 2
    return str_replace(
3871 2
        array(
3872
            "\xEF\xBF\xBD",
3873 2
            '�',
3874 2
        ),
3875 2
        array(
3876 2
            $replacementChar,
3877 2
            $replacementChar,
3878 2
        ),
3879
        $str
3880 2
    );
3881 2
  }
3882 2
3883 2
  /**
3884 2
   * Strip whitespace or other characters from end of a UTF-8 string.
3885 2
   *
3886
   * @param string $str   <p>The string to be trimmed.</p>
3887 2
   * @param string $chars <p>Optional characters to be stripped.</p>
3888
   *
3889
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3890 2
   */
3891 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3892
  {
3893
    $str = (string)$str;
3894
3895
    if (!isset($str[0])) {
3896
      return '';
3897
    }
3898
3899
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3900
    if ($chars === INF || !$chars) {
3901
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3902
    }
3903
3904
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3905
  }
3906
3907
  /**
3908
   * rxClass
3909
   *
3910
   * @param string $s
3911 1
   * @param string $class
3912
   *
3913 1
   * @return string
3914
   */
3915 1
  private static function rxClass($s, $class = '')
3916
  {
3917
    static $RX_CLASSS_CACHE = array();
3918
3919
    $cacheKey = $s . $class;
3920
3921
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
3922
      return $RX_CLASSS_CACHE[$cacheKey];
3923
    }
3924
3925
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3926
    $class = array($class);
3927
3928
    /** @noinspection SuspiciousLoopInspection */
3929
    foreach (self::str_split($s) as $s) {
3930
      if ('-' === $s) {
3931
        $class[0] = '-' . $class[0];
3932
      } elseif (!isset($s[2])) {
3933
        $class[0] .= preg_quote($s, '/');
3934
      } elseif (1 === self::strlen($s)) {
3935
        $class[0] .= $s;
3936
      } else {
3937
        $class[] = $s;
3938
      }
3939
    }
3940
3941
    if ($class[0]) {
3942
      $class[0] = '[' . $class[0] . ']';
3943
    }
3944
3945
    if (1 === count($class)) {
3946
      $return = $class[0];
3947 12
    } else {
3948
      $return = '(?:' . implode('|', $class) . ')';
3949 12
    }
3950
3951
    $RX_CLASSS_CACHE[$cacheKey] = $return;
3952
3953
    return $return;
3954
  }
3955
3956
  /**
3957
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3958
   */
3959 1
  public static function showSupport()
3960
  {
3961 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3962
      self::checkForSupport();
3963 1
    }
3964
3965 1
    foreach (self::$SUPPORT as $utf8Support) {
3966
      echo $utf8Support . "\n<br>";
3967
    }
3968
  }
3969
3970
  /**
3971
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3972
   *
3973
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3974
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3975
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3976
   *
3977 1
   * @return string <p>The HTML numbered entity.</p>
3978
   */
3979 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3980
  {
3981 1
    // init
3982 1
    $char = (string)$char;
3983 1
3984
    if (!isset($char[0])) {
3985 1
      return '';
3986 1
    }
3987 1
3988 1
    if (
3989
        $keepAsciiChars === true
3990
        &&
3991 1
        self::is_ascii($char) === true
3992
    ) {
3993
      return $char;
3994
    }
3995
3996
    if ($encoding !== 'UTF-8') {
3997
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3998
    }
3999
4000
    return '&#' . self::ord($char, $encoding) . ';';
4001
  }
4002 21
4003
  /**
4004
   * Convert a string to an array of Unicode characters.
4005 21
   *
4006 21
   * @param string  $str       <p>The string to split into array.</p>
4007
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4008 21
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4009 1
   *
4010
   * @return string[] <p>An array containing chunks of the string.</p>
4011
   */
4012 20
  public static function split($str, $length = 1, $cleanUtf8 = false)
4013
  {
4014
    $str = (string)$str;
4015
4016 20
    if (!isset($str[0])) {
4017 20
      return array();
4018
    }
4019 20
4020 20
    // init
4021
    $ret = array();
4022
4023 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4024 1
      self::checkForSupport();
4025
    }
4026
4027 1
    if (self::$SUPPORT['pcre_utf8'] === true) {
4028 1
4029 1
      if ($cleanUtf8 === true) {
4030 1
        $str = self::clean($str);
4031 1
      }
4032
4033 1
      preg_match_all('/./us', $str, $retArray);
4034
      if (isset($retArray[0])) {
4035 1
        $ret = $retArray[0];
4036
      }
4037
      unset($retArray);
4038
4039
    } else {
4040
4041
      // fallback
4042
4043
      $len = strlen($str);
4044
4045 1
      /** @noinspection ForeachInvariantsInspection */
4046
      for ($i = 0; $i < $len; $i++) {
4047 1
4048
        if (($str[$i] & "\x80") === "\x00") {
4049 1
4050
          $ret[] = $str[$i];
4051 1
4052
        } elseif (
4053
            isset($str[$i + 1])
4054
            &&
4055
            ($str[$i] & "\xE0") === "\xC0"
4056
        ) {
4057
4058
          if (($str[$i + 1] & "\xC0") === "\x80") {
4059
            $ret[] = $str[$i] . $str[$i + 1];
4060
4061
            $i++;
4062
          }
4063
4064 View Code Duplication
        } elseif (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4065 7
            isset($str[$i + 2])
4066
            &&
4067 7
            ($str[$i] & "\xF0") === "\xE0"
4068
        ) {
4069
4070
          if (
4071
              ($str[$i + 1] & "\xC0") === "\x80"
4072
              &&
4073
              ($str[$i + 2] & "\xC0") === "\x80"
4074
          ) {
4075
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4076
4077
            $i += 2;
4078
          }
4079
4080
        } elseif (
4081
            isset($str[$i + 3])
4082
            &&
4083 1
            ($str[$i] & "\xF8") === "\xF0"
4084
        ) {
4085 1
4086 1 View Code Duplication
          if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4087
              ($str[$i + 1] & "\xC0") === "\x80"
4088 1
              &&
4089
              ($str[$i + 2] & "\xC0") === "\x80"
4090 1
              &&
4091
              ($str[$i + 3] & "\xC0") === "\x80"
4092 1
          ) {
4093 1
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4094 1
4095 1
            $i += 3;
4096
          }
4097 1
4098
        }
4099 1
      }
4100 1
    }
4101 1
4102 1
    if ($length > 1) {
4103 1
      $ret = array_chunk($ret, $length);
4104 1
4105
      return array_map(
4106 1
          function ($item) {
4107
            return implode('', $item);
4108 1
          }, $ret
4109
      );
4110
    }
4111
4112 1
    /** @noinspection OffsetOperationsInspection */
4113
    if (isset($ret[0]) && $ret[0] === '') {
4114
      return array();
4115
    }
4116
4117
    return $ret;
4118
  }
4119
4120
  /**
4121
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4122
   *
4123
   * @param string $str <p>The input string.</p>
4124
   *
4125
   * @return false|string <p>
4126
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4127
   *                      otherwise it will return false.
4128
   *                      </p>
4129 9
   */
4130
  public static function str_detect_encoding($str)
4131 9
  {
4132
    //
4133
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4134
    //
4135
4136
    if (self::is_binary($str) === true) {
4137
      if (self::is_utf16($str) === 1) {
4138
        return 'UTF-16LE';
4139
      } elseif (self::is_utf16($str) === 2) {
4140
        return 'UTF-16BE';
4141
      } elseif (self::is_utf32($str) === 1) {
4142
        return 'UTF-32LE';
4143
      } elseif (self::is_utf32($str) === 2) {
4144
        return 'UTF-32BE';
4145
      }
4146
    }
4147 1
4148
    //
4149 1
    // 2.) simple check for ASCII chars
4150
    //
4151
4152
    if (self::is_ascii($str) === true) {
4153
      return 'ASCII';
4154
    }
4155
4156
    //
4157
    // 3.) simple check for UTF-8 chars
4158
    //
4159
4160
    if (self::is_utf8($str) === true) {
4161
      return 'UTF-8';
4162
    }
4163
4164 12
    //
4165
    // 4.) check via "\mb_detect_encoding()"
4166 12
    //
4167 11
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4168 11
4169 12
    $detectOrder = array(
4170
        'ISO-8859-1',
4171
        'ISO-8859-2',
4172
        'ISO-8859-3',
4173
        'ISO-8859-4',
4174
        'ISO-8859-5',
4175
        'ISO-8859-6',
4176
        'ISO-8859-7',
4177
        'ISO-8859-8',
4178
        'ISO-8859-9',
4179
        'ISO-8859-10',
4180
        'ISO-8859-13',
4181
        'ISO-8859-14',
4182 9
        'ISO-8859-15',
4183
        'ISO-8859-16',
4184 9
        'WINDOWS-1251',
4185 1
        'WINDOWS-1252',
4186
        'WINDOWS-1254',
4187
        'ISO-2022-JP',
4188 8
        'JIS',
4189 2
        'EUC-JP',
4190 2
    );
4191
4192 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4193 8
    if ($encoding) {
4194 1
      return $encoding;
4195
    }
4196
4197 7
    //
4198
    // 5.) check via "iconv()"
4199 7
    //
4200
4201
    $md5 = md5($str);
4202 1
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4203
      # INFO: //IGNORE and //TRANSLIT still throw notice
4204
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4205
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4206
        return $encodingTmp;
4207
      }
4208
    }
4209
4210
    return false;
4211
  }
4212
4213
  /**
4214
   * Check if the string ends with the given substring.
4215
   *
4216
   * @param string $haystack <p>The string to search in.</p>
4217
   * @param string $needle   <p>The substring to search for.</p>
4218 1
   *
4219
   * @return bool
4220 1
   */
4221 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4222
  {
4223
    $haystack = (string)$haystack;
4224
    $needle = (string)$needle;
4225
4226
    if (!isset($haystack[0], $needle[0])) {
4227
      return false;
4228
    }
4229
4230
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4231
      return true;
4232 2
    }
4233
4234 2
    return false;
4235 2
  }
4236
4237 2
  /**
4238 2
   * Check if the string ends with the given substring, case insensitive.
4239 2
   *
4240
   * @param string $haystack <p>The string to search in.</p>
4241 2
   * @param string $needle   <p>The substring to search for.</p>
4242 2
   *
4243
   * @return bool
4244
   */
4245 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4246
  {
4247
    $haystack = (string)$haystack;
4248
    $needle = (string)$needle;
4249
4250
    if (!isset($haystack[0], $needle[0])) {
4251
      return false;
4252 3
    }
4253
4254 3
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4255 3
      return true;
4256 3
    }
4257
4258 3
    return false;
4259
  }
4260 3
4261
  /**
4262
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4263
   *
4264
   * @link  http://php.net/manual/en/function.str-ireplace.php
4265
   *
4266
   * @param mixed $search  <p>
4267
   *                       Every replacement with search array is
4268
   *                       performed on the result of previous replacement.
4269
   *                       </p>
4270
   * @param mixed $replace <p>
4271
   *                       </p>
4272
   * @param mixed $subject <p>
4273
   *                       If subject is an array, then the search and
4274
   *                       replace is performed with every entry of
4275
   *                       subject, and the return value is an array as
4276
   *                       well.
4277
   *                       </p>
4278
   * @param int   $count   [optional] <p>
4279
   *                       The number of matched and replaced needles will
4280
   *                       be returned in count which is passed by
4281
   *                       reference.
4282 2
   *                       </p>
4283
   *
4284
   * @return mixed <p>A string or an array of replacements.</p>
4285 2
   */
4286
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4287 2
  {
4288
    $search = (array)$search;
4289
4290
    /** @noinspection AlterInForeachInspection */
4291
    foreach ($search as &$s) {
4292
      if ('' === $s .= '') {
4293
        $s = '/^(?<=.)$/';
4294
      } else {
4295
        $s = '/' . preg_quote($s, '/') . '/ui';
4296
      }
4297
    }
4298
4299
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4300
    $count = $replace; // used as reference parameter
4301
4302
    return $subject;
4303
  }
4304
4305
  /**
4306
   * Check if the string starts with the given substring, case insensitive.
4307
   *
4308
   * @param string $haystack <p>The string to search in.</p>
4309
   * @param string $needle   <p>The substring to search for.</p>
4310
   *
4311
   * @return bool
4312
   */
4313 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4314 8
  {
4315
    $haystack = (string)$haystack;
4316 8
    $needle = (string)$needle;
4317 8
4318
    if (!isset($haystack[0], $needle[0])) {
4319 8
      return false;
4320 3
    }
4321
4322
    if (self::stripos($haystack, $needle) === 0) {
4323 7
      return true;
4324 1
    }
4325 1
4326 1
    return false;
4327
  }
4328
4329
  /**
4330 7
   * Limit the number of characters in a string, but also after the next word.
4331 1
   *
4332 7
   * @param string $str
4333 7
   * @param int    $length
4334 7
   * @param string $strAddOn
4335
   *
4336
   * @return string
4337
   */
4338 7
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4339
  {
4340
    $str = (string)$str;
4341
4342
    if (!isset($str[0])) {
4343
      return '';
4344
    }
4345
4346
    $length = (int)$length;
4347
4348
    if (self::strlen($str) <= $length) {
4349
      return $str;
4350
    }
4351
4352
    if (self::substr($str, $length - 1, 1) === ' ') {
4353
      return self::substr($str, 0, $length - 1) . $strAddOn;
4354
    }
4355 8
4356
    $str = self::substr($str, 0, $length);
4357 8
    $array = explode(' ', $str);
4358 2
    array_pop($array);
4359
    $new_str = implode(' ', $array);
4360
4361 6
    if ($new_str === '') {
4362
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4363
    } else {
4364
      $str = $new_str . $strAddOn;
4365 6
    }
4366
4367
    return $str;
4368
  }
4369
4370
  /**
4371
   * Pad a UTF-8 string to given length with another string.
4372 6
   *
4373
   * @param string $str        <p>The input string.</p>
4374
   * @param int    $pad_length <p>The length of return string.</p>
4375
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4376
   * @param int    $pad_type   [optional] <p>
4377
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4378
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4379
   *                           </p>
4380
   *
4381
   * @return string <strong>Returns the padded string</strong>
4382
   */
4383
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4384
  {
4385
    $str_length = self::strlen($str);
4386
4387 62
    if (
4388
        is_int($pad_length) === true
4389 62
        &&
4390
        $pad_length > 0
4391 62
        &&
4392 4
        $pad_length >= $str_length
4393
    ) {
4394
      $ps_length = self::strlen($pad_string);
4395
4396
      $diff = $pad_length - $str_length;
4397 61
4398 2
      switch ($pad_type) {
4399 61 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4400 60
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4401 60
          $pre = self::substr($pre, 0, $diff);
4402 2
          $post = '';
4403
          break;
4404
4405
        case STR_PAD_BOTH:
4406 61
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4407 61
          $pre = self::substr($pre, 0, (int)$diff / 2);
4408 1
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4409
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4410
          break;
4411 61
4412 2
        case STR_PAD_RIGHT:
4413 2 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4414
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4415 61
          $post = self::substr($post, 0, $diff);
4416
          $pre = '';
4417
      }
4418
4419
      return $pre . $str . $post;
4420
    }
4421
4422
    return $str;
4423
  }
4424
4425
  /**
4426
   * Repeat a string.
4427
   *
4428
   * @param string $str        <p>
4429
   *                           The string to be repeated.
4430 1
   *                           </p>
4431
   * @param int    $multiplier <p>
4432 1
   *                           Number of time the input string should be
4433
   *                           repeated.
4434
   *                           </p>
4435
   *                           <p>
4436
   *                           multiplier has to be greater than or equal to 0.
4437
   *                           If the multiplier is set to 0, the function
4438
   *                           will return an empty string.
4439
   *                           </p>
4440
   *
4441
   * @return string <p>The repeated string.</p>
4442
   */
4443
  public static function str_repeat($str, $multiplier)
4444
  {
4445
    $str = self::filter($str);
4446
4447
    return str_repeat($str, $multiplier);
4448
  }
4449 2
4450
  /**
4451 2
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4452
   *
4453
   * Replace all occurrences of the search string with the replacement string
4454
   *
4455
   * @link http://php.net/manual/en/function.str-replace.php
4456
   *
4457
   * @param mixed $search  <p>
4458
   *                       The value being searched for, otherwise known as the needle.
4459
   *                       An array may be used to designate multiple needles.
4460
   *                       </p>
4461
   * @param mixed $replace <p>
4462
   *                       The replacement value that replaces found search
4463
   *                       values. An array may be used to designate multiple replacements.
4464
   *                       </p>
4465
   * @param mixed $subject <p>
4466
   *                       The string or array being searched and replaced on,
4467 1
   *                       otherwise known as the haystack.
4468
   *                       </p>
4469 1
   *                       <p>
4470
   *                       If subject is an array, then the search and
4471
   *                       replace is performed with every entry of
4472
   *                       subject, and the return value is an array as
4473
   *                       well.
4474
   *                       </p>
4475
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4476
   *
4477
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4478
   */
4479
  public static function str_replace($search, $replace, $subject, &$count = null)
4480
  {
4481
    return str_replace($search, $replace, $subject, $count);
4482
  }
4483
4484
  /**
4485 2
   * Replace the first "$search"-term with the "$replace"-term.
4486
   *
4487 2
   * @param string $search
4488 2
   * @param string $replace
4489
   * @param string $subject
4490 2
   *
4491
   * @return string
4492
   */
4493
  public static function str_replace_first($search, $replace, $subject)
4494
  {
4495
    $pos = self::strpos($subject, $search);
4496
4497
    if ($pos !== false) {
4498
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4499
    }
4500
4501
    return $subject;
4502
  }
4503 1
4504
  /**
4505 1
   * Shuffles all the characters in the string.
4506 1
   *
4507
   * @param string $str <p>The input string</p>
4508 1
   *
4509 1
   * @return string <p>The shuffled string.</p>
4510
   */
4511
  public static function str_shuffle($str)
4512 1
  {
4513 1
    $array = self::split($str);
4514
4515 1
    shuffle($array);
4516
4517
    return implode('', $array);
4518
  }
4519
4520
  /**
4521
   * Sort all characters according to code points.
4522
   *
4523
   * @param string $str    <p>A UTF-8 string.</p>
4524
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4525
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4526
   *
4527
   * @return string <p>String of sorted characters.</p>
4528
   */
4529
  public static function str_sort($str, $unique = false, $desc = false)
4530
  {
4531
    $array = self::codepoints($str);
4532
4533
    if ($unique) {
4534
      $array = array_flip(array_flip($array));
4535 15
    }
4536
4537 15
    if ($desc) {
4538 15
      arsort($array);
4539
    } else {
4540 15
      asort($array);
4541 2
    }
4542
4543
    return self::string($array);
4544
  }
4545 14
4546
  /**
4547
   * Split a string into an array.
4548
   *
4549 14
   * @param string $str
4550
   * @param int    $len
4551
   *
4552
   * @return array
4553 14
   */
4554
  public static function str_split($str, $len = 1)
4555
  {
4556 2
    // init
4557 2
    $len = (int)$len;
4558 2
    $str = (string)$str;
4559
4560 14
    if (!isset($str[0])) {
4561
      return array();
4562
    }
4563
4564
    if ($len < 1) {
4565
      return str_split($str, $len);
4566 14
    }
4567 2
4568 14
    /** @noinspection PhpInternalEntityUsedInspection */
4569 14
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4570 14
    $a = $a[0];
4571 1
4572
    if ($len === 1) {
4573
      return $a;
4574 14
    }
4575 14
4576
    $arrayOutput = array();
4577
    $p = -1;
4578
4579
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4580
    foreach ($a as $l => $a) {
4581
      if ($l % $len) {
4582
        $arrayOutput[$p] .= $a;
4583
      } else {
4584
        $arrayOutput[++$p] = $a;
4585
      }
4586
    }
4587
4588
    return $arrayOutput;
4589
  }
4590
4591
  /**
4592
   * Check if the string starts with the given substring.
4593
   *
4594
   * @param string $haystack <p>The string to search in.</p>
4595
   * @param string $needle   <p>The substring to search for.</p>
4596
   *
4597
   * @return bool
4598
   */
4599 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4600
  {
4601
    $haystack = (string)$haystack;
4602
    $needle = (string)$needle;
4603
4604
    if (!isset($haystack[0], $needle[0])) {
4605
      return false;
4606
    }
4607
4608
    if (self::strpos($haystack, $needle) === 0) {
4609
      return true;
4610
    }
4611
4612
    return false;
4613
  }
4614
4615
  /**
4616
   * Get a binary representation of a specific string.
4617
   *
4618
   * @param string $str <p>The input string.</p>
4619
   *
4620 1
   * @return string
4621
   */
4622 1
  public static function str_to_binary($str)
4623 1
  {
4624 1
    $str = (string)$str;
4625
4626 1
    $value = unpack('H*', $str);
4627
4628
    return base_convert($value[1], 16, 2);
4629
  }
4630
4631
  /**
4632
   * Convert a string into an array of words.
4633 1
   *
4634
   * @param string $str
4635
   * @param string $charlist
4636
   *
4637
   * @return array
4638
   */
4639
  public static function str_to_words($str, $charlist = '')
4640
  {
4641
    $str = (string)$str;
4642
4643 4
    if (!isset($str[0])) {
4644
      return array('');
4645 4
    }
4646
4647 4
    $charlist = self::rxClass($charlist, '\pL');
4648 2
4649
    return \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4650
  }
4651 3
4652
  /**
4653
   * alias for "UTF8::to_ascii()"
4654
   *
4655
   * @see UTF8::to_ascii()
4656
   *
4657
   * @param string $str
4658
   * @param string $unknown
4659
   * @param bool   $strict
4660
   *
4661
   * @return string
4662
   */
4663
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4664
  {
4665
    return self::to_ascii($str, $unknown, $strict);
4666
  }
4667
4668
  /**
4669
   * Counts number of words in the UTF-8 string.
4670
   *
4671
   * @param string $str      <p>The input string.</p>
4672
   * @param int    $format   [optional] <p>
4673
   *                         <strong>0</strong> => return a number of words (default)<br />
4674
   *                         <strong>1</strong> => return an array of words<br />
4675
   *                         <strong>2</strong> => return an array of words with word-offset as key
4676
   *                         </p>
4677 1
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4678
   *
4679 1
   * @return array|int <p>The number of words in the string</p>
4680 1
   */
4681 1
  public static function str_word_count($str, $format = 0, $charlist = '')
4682
  {
4683 1
    $strParts = self::str_to_words($str, $charlist);
4684
4685
    $len = count($strParts);
4686
4687
    if ($format === 1) {
4688
4689
      $numberOfWords = array();
4690 1
      for ($i = 1; $i < $len; $i += 2) {
4691
        $numberOfWords[] = $strParts[$i];
4692
      }
4693
4694
    } elseif ($format === 2) {
4695
4696
      $numberOfWords = array();
4697
      $offset = self::strlen($strParts[0]);
4698
      for ($i = 1; $i < $len; $i += 2) {
4699
        $numberOfWords[$offset] = $strParts[$i];
4700
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4701
      }
4702
4703
    } else {
4704
4705
      $numberOfWords = ($len - 1) / 2;
4706
4707 1
    }
4708
4709 1
    return $numberOfWords;
4710
  }
4711
4712
  /**
4713
   * Case-insensitive string comparison.
4714
   *
4715
   * INFO: Case-insensitive version of UTF8::strcmp()
4716
   *
4717
   * @param string $str1
4718
   * @param string $str2
4719
   *
4720
   * @return int <p>
4721
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4722
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4723
   *             <strong>0</strong> if they are equal.
4724
   *             </p>
4725
   */
4726
  public static function strcasecmp($str1, $str2)
4727
  {
4728
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4729 11
  }
4730
4731 11
  /**
4732
   * alias for "UTF8::strstr()"
4733 11
   *
4734 2
   * @see UTF8::strstr()
4735 2
   *
4736
   * @param string  $haystack
4737 11
   * @param string  $needle
4738
   * @param bool    $before_needle
4739 11
   * @param string  $encoding
4740 2
   * @param boolean $cleanUtf8
4741
   *
4742
   * @return string|false
4743
   */
4744 10
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4745 10
  {
4746
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4747
  }
4748
4749 10
  /**
4750
   * Case-sensitive string comparison.
4751 10
   *
4752
   * @param string $str1
4753
   * @param string $str2
4754 3
   *
4755 3
   * @return int  <p>
4756 3
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4757
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4758 10
   *              <strong>0</strong> if they are equal.
4759
   *              </p>
4760
   */
4761
  public static function strcmp($str1, $str2)
4762
  {
4763
    /** @noinspection PhpUndefinedClassInspection */
4764 10
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4765 1
        \Normalizer::normalize($str1, \Normalizer::NFD),
4766 10
        \Normalizer::normalize($str2, \Normalizer::NFD)
4767 10
    );
4768 10
  }
4769 1
4770
  /**
4771
   * Find length of initial segment not matching mask.
4772
   *
4773
   * @param string $str
4774 10
   * @param string $charList
4775 10
   * @param int    $offset
4776 10
   * @param int    $length
4777 10
   *
4778
   * @return int|null
4779
   */
4780
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4781
  {
4782
    if ('' === $charList .= '') {
4783
      return null;
4784
    }
4785
4786
    if ($offset || 2147483647 !== $length) {
4787
      $str = (string)self::substr($str, $offset, $length);
4788
    }
4789
4790
    $str = (string)$str;
4791
    if (!isset($str[0])) {
4792
      return null;
4793
    }
4794
4795
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4796
      /** @noinspection OffsetOperationsInspection */
4797
      return self::strlen($length[1]);
4798
    }
4799
4800
    return self::strlen($str);
4801
  }
4802
4803
  /**
4804
   * alias for "UTF8::stristr()"
4805
   *
4806
   * @see UTF8::stristr()
4807
   *
4808
   * @param string  $haystack
4809
   * @param string  $needle
4810
   * @param bool    $before_needle
4811
   * @param string  $encoding
4812
   * @param boolean $cleanUtf8
4813 10
   *
4814
   * @return string|false
4815
   */
4816 10
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4817 10
  {
4818
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4819 10
  }
4820 2
4821 2
  /**
4822
   * Create a UTF-8 string from code points.
4823 10
   *
4824 10
   * INFO: opposite to UTF8::codepoints()
4825 2
   *
4826
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4827
   *
4828 8
   * @return string <p>UTF-8 encoded string.</p>
4829
   */
4830
  public static function string(array $array)
4831
  {
4832
    return implode(
4833
        '',
4834
        array_map(
4835
            array(
4836
                '\\voku\\helper\\UTF8',
4837
                'chr',
4838
            ),
4839
            $array
4840
        )
4841
    );
4842
  }
4843
4844
  /**
4845 2
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4846
   *
4847 2
   * @param string $str <p>The input string.</p>
4848
   *
4849
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4850
   */
4851
  public static function string_has_bom($str)
4852
  {
4853
    foreach (self::$BOM as $bomString => $bomByteLength) {
4854 2
      if (0 === strpos($str, $bomString)) {
4855 1
        return true;
4856 1
      }
4857
    }
4858
4859
    return false;
4860 2
  }
4861 2
4862 2
  /**
4863 2
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4864
   *
4865
   * @link http://php.net/manual/en/function.strip-tags.php
4866
   *
4867
   * @param string  $str            <p>
4868
   *                                The input string.
4869
   *                                </p>
4870
   * @param string  $allowable_tags [optional] <p>
4871
   *                                You can use the optional second parameter to specify tags which should
4872
   *                                not be stripped.
4873
   *                                </p>
4874
   *                                <p>
4875
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4876
   *                                can not be changed with allowable_tags.
4877
   *                                </p>
4878
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
4879
   *
4880
   * @return string <p>The stripped string.</p>
4881
   */
4882 11
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
4883
  {
4884 11
    $str = (string)$str;
4885 11
4886 11
    if (!isset($str[0])) {
4887
      return '';
4888 11
    }
4889 1
4890 1
    if ($cleanUtf8) {
4891 1
      $str = self::clean($str);
4892
    }
4893 11
4894
    return strip_tags($str, $allowable_tags);
4895 11
  }
4896
4897 11
  /**
4898 1
   * Finds position of first occurrence of a string within another, case insensitive.
4899 1
   *
4900
   * @link http://php.net/manual/en/function.mb-stripos.php
4901
   *
4902 11
   * @param string  $haystack  <p>
4903 11
   *                           The string from which to get the position of the first occurrence
4904
   *                           of needle
4905 11
   *                           </p>
4906
   * @param string  $needle    <p>
4907 11
   *                           The string to find in haystack
4908
   *                           </p>
4909
   * @param int     $offset    [optional] <p>
4910
   *                           The position in haystack
4911
   *                           to start searching
4912
   *                           </p>
4913
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4914
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4915
   *
4916
   * @return int|false <p>
4917
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4918
   *                   or false if needle is not found.
4919
   *                   </p>
4920
   */
4921 21
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4922
  {
4923
    $haystack = (string)$haystack;
4924 21
    $needle = (string)$needle;
4925
    $offset = (int)$offset;
4926 21
4927 6
    if (!isset($haystack[0], $needle[0])) {
4928
      return false;
4929
    }
4930 19
4931
    if ($cleanUtf8 === true) {
4932
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4933
      // if invalid characters are found in $haystack before $needle
4934
      $haystack = self::clean($haystack);
4935
      $needle = self::clean($needle);
4936 19
    }
4937 2
4938 2 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4939
        $encoding === 'UTF-8'
4940 19
        ||
4941
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4942
    ) {
4943
      $encoding = 'UTF-8';
4944
    } else {
4945
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4946
    }
4947
4948
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4949
      self::checkForSupport();
4950 3
    }
4951
4952 3
    if (
4953
        $encoding == 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4954
        &&
4955
        self::$SUPPORT['intl'] === true
4956
        &&
4957
        Bootup::is_php('5.4') === true
4958
    ) {
4959
      return \grapheme_stripos($haystack, $needle, $offset);
4960
    }
4961
4962
    // fallback to "mb_"-function via polyfill
4963
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4964
  }
4965
4966 16
  /**
4967
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4968 16
   *
4969
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4970 16
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4971 2
   * @param bool    $before_needle [optional] <p>
4972
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4973
   *                               haystack before the first occurrence of the needle (excluding the needle).
4974 15
   *                               </p>
4975
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4976
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4977
   *
4978
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4979
   */
4980 15
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4981 2
  {
4982 2
    $haystack = (string)$haystack;
4983
    $needle = (string)$needle;
4984 15
    $before_needle = (bool)$before_needle;
4985
4986
    if (!isset($haystack[0], $needle[0])) {
4987
      return false;
4988
    }
4989
4990
    if ($encoding !== 'UTF-8') {
4991
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4992
    }
4993
4994
    if ($cleanUtf8 === true) {
4995
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4996
      // if invalid characters are found in $haystack before $needle
4997
      $needle = self::clean($needle);
4998
      $haystack = self::clean($haystack);
4999
    }
5000
5001 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5002
      self::checkForSupport();
5003 1
    }
5004 1
5005 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5006 1
        $encoding !== 'UTF-8'
5007 1
        &&
5008
        self::$SUPPORT['mbstring'] === false
5009 1
    ) {
5010 1
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5011 1
    }
5012 1
5013 1
    if (self::$SUPPORT['mbstring'] === true) {
5014
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5015 1
    }
5016 1
5017
    if (self::$SUPPORT['intl'] === true) {
5018 1
      return \grapheme_stristr($haystack, $needle, $before_needle);
5019
    }
5020
5021
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5022
5023
    if (!isset($match[1])) {
5024
      return false;
5025
    }
5026
5027
    if ($before_needle) {
5028
      return $match[1];
5029
    }
5030 1
5031
    return self::substr($haystack, self::strlen($match[1]));
5032 1
  }
5033 1
5034 1
  /**
5035
   * Get the string length, not the byte-length!
5036 1
   *
5037
   * @link     http://php.net/manual/en/function.mb-strlen.php
5038
   *
5039
   * @param string  $str       <p>The string being checked for length.</p>
5040 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5041 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5042
   *
5043 1
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5044
   *             character counted as +1)</p>
5045
   */
5046
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5047
  {
5048
    $str = (string)$str;
5049
5050
    if (!isset($str[0])) {
5051
      return 0;
5052
    }
5053
5054 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5055
        $encoding === 'UTF-8'
5056
        ||
5057
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5058
    ) {
5059 47
      $encoding = 'UTF-8';
5060
    } else {
5061
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5062 47
    }
5063
5064 47
    switch ($encoding) {
5065 9
      case 'ASCII':
5066
      case 'CP850':
5067
        return strlen($str);
5068 45
    }
5069
5070
    if ($cleanUtf8 === true) {
5071
      $str = self::clean($str);
5072 1
    }
5073 1
5074
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5075 45
      self::checkForSupport();
5076 45
    }
5077 37
5078 37 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5079
        $encoding !== 'UTF-8'
5080 45
        &&
5081 2
        self::$SUPPORT['mbstring'] === false
5082
        &&
5083
        self::$SUPPORT['iconv'] === false
5084 43
    ) {
5085 20
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5086 20
    }
5087 41
5088
    if (
5089
        $encoding !== 'UTF-8'
5090 43
        &&
5091
        self::$SUPPORT['iconv'] === true
5092
        &&
5093
        self::$SUPPORT['mbstring'] === false
5094
    ) {
5095
      $returnTmp = \iconv_strlen($str, $encoding);
5096 43
      if ($returnTmp !== false) {
5097 2
        return $returnTmp;
5098 43
      }
5099 43
    }
5100 43
5101 1
    if (self::$SUPPORT['mbstring'] === true) {
5102
      return \mb_strlen($str, $encoding);
5103
    }
5104 43
5105 43
    if (self::$SUPPORT['intl'] === true) {
5106
      $str = self::clean($str);
5107
      $returnTmp = \grapheme_strlen($str);
5108
      if ($returnTmp !== null) {
5109
        return $returnTmp;
5110
      }
5111
    }
5112
5113
    if (self::$SUPPORT['iconv'] === true) {
5114
      $returnTmp = \iconv_strlen($str, $encoding);
5115
      if ($returnTmp !== false) {
5116
        return $returnTmp;
5117
      }
5118
    }
5119
5120
    // fallback via vanilla php
5121
    preg_match_all('/./us', $str, $parts);
5122
    $returnTmp = count($parts[0]);
5123
    if ($returnTmp !== 0) {
5124
      return $returnTmp;
5125
    }
5126
5127
    // fallback to "mb_"-function via polyfill
5128
    return \mb_strlen($str);
5129
  }
5130
5131
  /**
5132
   * Case insensitive string comparisons using a "natural order" algorithm.
5133
   *
5134
   * INFO: natural order version of UTF8::strcasecmp()
5135 1
   *
5136
   * @param string $str1 <p>The first string.</p>
5137 1
   * @param string $str2 <p>The second string.</p>
5138 1
   *
5139
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5140 1
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5141
   *             <strong>0</strong> if they are equal
5142
   */
5143
  public static function strnatcasecmp($str1, $str2)
5144
  {
5145
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5146
  }
5147
5148
  /**
5149
   * String comparisons using a "natural order" algorithm
5150
   *
5151
   * INFO: natural order version of UTF8::strcmp()
5152
   *
5153
   * @link  http://php.net/manual/en/function.strnatcmp.php
5154
   *
5155
   * @param string $str1 <p>The first string.</p>
5156
   * @param string $str2 <p>The second string.</p>
5157
   *
5158
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5159
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5160
   *             <strong>0</strong> if they are equal
5161 1
   */
5162
  public static function strnatcmp($str1, $str2)
5163 1
  {
5164 1
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5165
  }
5166 1
5167 1
  /**
5168
   * Case-insensitive string comparison of the first n characters.
5169
   *
5170 1
   * @link  http://php.net/manual/en/function.strncasecmp.php
5171 1
   *
5172 1
   * @param string $str1 <p>The first string.</p>
5173
   * @param string $str2 <p>The second string.</p>
5174 1
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5175 1
   *
5176
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5177
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5178 1
   *             <strong>0</strong> if they are equal
5179 1
   */
5180
  public static function strncasecmp($str1, $str2, $len)
5181 1
  {
5182 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5183 1
  }
5184
5185 1
  /**
5186
   * String comparison of the first n characters.
5187
   *
5188
   * @link  http://php.net/manual/en/function.strncmp.php
5189
   *
5190
   * @param string $str1 <p>The first string.</p>
5191
   * @param string $str2 <p>The second string.</p>
5192 1
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5193
   *
5194
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5195
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5196
   *             <strong>0</strong> if they are equal
5197
   */
5198
  public static function strncmp($str1, $str2, $len)
5199
  {
5200
    $str1 = self::substr($str1, 0, $len);
5201
    $str2 = self::substr($str2, 0, $len);
5202
5203
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5200 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5201 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5204
  }
5205
5206
  /**
5207 6
   * Search a string for any of a set of characters.
5208
   *
5209 6
   * @link  http://php.net/manual/en/function.strpbrk.php
5210 1
   *
5211
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5212
   * @param string $char_list <p>This parameter is case sensitive.</p>
5213 1
   *
5214 1
   * @return string String starting from the character found, or false if it is not found.
5215 1
   */
5216 1
  public static function strpbrk($haystack, $char_list)
5217
  {
5218
    $haystack = (string)$haystack;
5219
    $char_list = (string)$char_list;
5220 1
5221 1
    if (!isset($haystack[0], $char_list[0])) {
5222 1
      return false;
5223 1
    }
5224 1
5225 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5226 1
      return substr($haystack, strpos($haystack, $m[0]));
5227 1
    } else {
5228
      return false;
5229
    }
5230
  }
5231 1
5232 1
  /**
5233 1
   * Find position of first occurrence of string in a string.
5234 1
   *
5235 1
   * @link http://php.net/manual/en/function.mb-strpos.php
5236 1
   *
5237 1
   * @param string  $haystack  <p>The string being checked.</p>
5238 1
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5239
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5240
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5241 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5242 1
   *
5243 1
   * @return int|false <p>
5244 1
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5245
   *                   If needle is not found it returns false.
5246
   *                   </p>
5247
   */
5248 1
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5249
  {
5250 6
    $haystack = (string)$haystack;
5251 1
    $needle = (string)$needle;
5252 1
5253 1
    if (!isset($haystack[0], $needle[0])) {
5254 1
      return false;
5255
    }
5256 1
5257
    // init
5258
    $offset = (int)$offset;
5259 6
5260 6
    // iconv and mbstring do not support integer $needle
5261
5262 6
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5263 4
      $needle = (string)self::chr($needle);
5264 4
    }
5265
5266 6
    if ($cleanUtf8 === true) {
5267
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5268 6
      // if invalid characters are found in $haystack before $needle
5269
      $needle = self::clean($needle);
5270
      $haystack = self::clean($haystack);
5271
    }
5272
5273 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5274
        $encoding === 'UTF-8'
5275
        ||
5276
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5277
    ) {
5278
      $encoding = 'UTF-8';
5279
    } else {
5280 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5281
    }
5282 1
5283
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5284 1
      self::checkForSupport();
5285 1
    }
5286
5287 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5288 1
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5289 1
        &
5290 1
        self::$SUPPORT['iconv'] === true
5291
        &&
5292 1
        self::$SUPPORT['mbstring'] === false
5293
    ) {
5294
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5295 1
    }
5296 1
5297
    if (
5298 1
        $offset >= 0 // iconv_strpos() can't handle negative offset
5299 1
        &&
5300
        $encoding !== 'UTF-8'
5301 1
        &&
5302
        self::$SUPPORT['mbstring'] === false
5303 1
        &&
5304 1
        self::$SUPPORT['iconv'] === true
5305
    ) {
5306 1
      // ignore invalid negative offset to keep compatibility
5307
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5308 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5309
    }
5310 1
5311
    if (self::$SUPPORT['mbstring'] === true) {
5312 1
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5313
    }
5314
5315 View Code Duplication
    if (self::$SUPPORT['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5316
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
5317
      if ($returnTmp !== false) {
5318
        return $returnTmp;
5319
      }
5320
    }
5321
5322
    if (
5323
        $offset >= 0 // iconv_strpos() can't handle negative offset
5324
        &&
5325
        self::$SUPPORT['iconv'] === true
5326 7
    ) {
5327
      // ignore invalid negative offset to keep compatibility
5328 7
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5329
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5330
    }
5331
5332
    // fallback via vanilla php
5333
5334
    $haystack = self::substr($haystack, $offset);
5335
5336
    if ($offset < 0) {
5337
      $offset = 0;
5338
    }
5339
5340 1
    $pos = strpos($haystack, $needle);
5341
    if ($pos === false) {
5342 1
      return false;
5343
    }
5344
5345
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5346
    if ($returnTmp !== false) {
5347
      return $returnTmp;
5348
    }
5349
5350
    // fallback to "mb_"-function via polyfill
5351
    return \mb_strpos($haystack, $needle, $offset);
5352
  }
5353
5354 1
  /**
5355
   * Finds the last occurrence of a character in a string within another.
5356 1
   *
5357
   * @link http://php.net/manual/en/function.mb-strrchr.php
5358
   *
5359
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5360
   * @param string $needle        <p>The string to find in haystack</p>
5361
   * @param bool   $before_needle [optional] <p>
5362
   *                              Determines which portion of haystack
5363
   *                              this function returns.
5364
   *                              If set to true, it returns all of haystack
5365
   *                              from the beginning to the last occurrence of needle.
5366
   *                              If set to false, it returns all of haystack
5367
   *                              from the last occurrence of needle to the end,
5368 1
   *                              </p>
5369
   * @param string $encoding      [optional] <p>
5370 1
   *                              Character encoding name to use.
5371
   *                              If it is omitted, internal character encoding is used.
5372
   *                              </p>
5373
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5374
   *
5375
   * @return string|false The portion of haystack or false if needle is not found.
5376
   */
5377 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5378
  {
5379
    if ($encoding !== 'UTF-8') {
5380
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5381
    }
5382
5383
    if ($cleanUtf8 === true) {
5384
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5385 13
      // if invalid characters are found in $haystack before $needle
5386
      $needle = self::clean($needle);
5387 13
      $haystack = self::clean($haystack);
5388
    }
5389
5390 13
    // fallback to "mb_"-function via polyfill
5391
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5392 13
  }
5393 3
5394
  /**
5395
   * Reverses characters order in the string.
5396 11
   *
5397
   * @param string $str The input string
5398
   *
5399 11
   * @return string The string with characters in the reverse sequence
5400 7
   */
5401
  public static function strrev($str)
5402
  {
5403 5
    $str = (string)$str;
5404 1
5405
    if (!isset($str[0])) {
5406
      return '';
5407
    }
5408 1
5409 1
    return implode('', array_reverse(self::split($str)));
5410
  }
5411
5412 1
  /**
5413 1
   * Finds the last occurrence of a character in a string within another, case insensitive.
5414
   *
5415
   * @link http://php.net/manual/en/function.mb-strrichr.php
5416 1
   *
5417
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5418
   * @param string  $needle        <p>The string to find in haystack.</p>
5419 1
   * @param bool    $before_needle [optional] <p>
5420
   *                               Determines which portion of haystack
5421 5
   *                               this function returns.
5422 5
   *                               If set to true, it returns all of haystack
5423 5
   *                               from the beginning to the last occurrence of needle.
5424
   *                               If set to false, it returns all of haystack
5425 5
   *                               from the last occurrence of needle to the end,
5426
   *                               </p>
5427 5
   * @param string  $encoding      [optional] <p>
5428 5
   *                               Character encoding name to use.
5429
   *                               If it is omitted, internal character encoding is used.
5430
   *                               </p>
5431 5
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5432
   *
5433
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5434 5
   */
5435 5 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5436 5
  {
5437
    if ($encoding !== 'UTF-8') {
5438 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5439 2
    }
5440
5441 2
    if ($cleanUtf8 === true) {
5442 2
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5443 2
      // if invalid characters are found in $haystack before $needle
5444
      $needle = self::clean($needle);
5445 2
      $haystack = self::clean($haystack);
5446 1
    }
5447
5448 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5449 1
  }
5450 1
5451
  /**
5452 1
   * Find position of last occurrence of a case-insensitive string.
5453
   *
5454
   * @param string  $haystack  <p>The string to look in.</p>
5455
   * @param string  $needle    <p>The string to look for.</p>
5456
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5457
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5458
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5459
   *
5460
   * @return int|false <p>
5461
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5462
   *                   not found, it returns false.
5463
   *                   </p>
5464
   */
5465
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5466
  {
5467 1
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5468 2
      $needle = (string)self::chr($needle);
5469
    }
5470 5
5471
    // init
5472
    $haystack = (string)$haystack;
5473
    $needle = (string)$needle;
5474
    $offset = (int)$offset;
5475 5
5476
    if (!isset($haystack[0], $needle[0])) {
5477
      return false;
5478
    }
5479
5480 5 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5481 5
        $cleanUtf8 === true
5482 1
        ||
5483 1
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5484
    ) {
5485 1
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5486 1
5487 1
      $needle = self::clean($needle);
5488
      $haystack = self::clean($haystack);
5489 1
    }
5490
5491 5 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5492 5
        $encoding === 'UTF-8'
5493 5
        ||
5494 5
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5495 1
    ) {
5496
      $encoding = 'UTF-8';
5497 5
    } else {
5498
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5499 5
    }
5500
5501
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5502
      self::checkForSupport();
5503
    }
5504
5505 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5506
        $encoding !== 'UTF-8'
5507
        &&
5508
        self::$SUPPORT['mbstring'] === false
5509 2
    ) {
5510
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5511 2
    }
5512
5513 1
    if (self::$SUPPORT['mbstring'] === true) {
5514
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5515
    }
5516 1
5517 1 View Code Duplication
    if (self::$SUPPORT['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5518
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
5519 1
      if ($returnTmp !== false) {
5520
        return $returnTmp;
5521
      }
5522 2
    }
5523
5524 2
    // fallback via vanilla php
5525 1
5526
    return self::strrpos(self::strtonatfold($haystack), self::strtonatfold($needle), $offset, $encoding, $cleanUtf8);
5527
  }
5528 2
5529
  /**
5530
   * Find position of last occurrence of a string in a string.
5531
   *
5532
   * @link http://php.net/manual/en/function.mb-strrpos.php
5533
   *
5534
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5535
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5536
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5537
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5538
   *                              the end of the string.
5539
   *                              </p>
5540 1
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5541
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5542 1
   *
5543
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5544
   *                   is not found, it returns false.</p>
5545
   */
5546
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5547
  {
5548
    if ((int)$needle === $needle && $needle >= 0) {
5549
      $needle = (string)self::chr($needle);
5550
    }
5551
5552
    // init
5553
    $haystack = (string)$haystack;
5554
    $needle = (string)$needle;
5555
    $offset = (int)$offset;
5556
5557
    if (!isset($haystack[0], $needle[0])) {
5558
      return false;
5559
    }
5560
5561 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5562
        $cleanUtf8 === true
5563
        ||
5564
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5565
    ) {
5566
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5567
      $needle = self::clean($needle);
5568 20
      $haystack = self::clean($haystack);
5569
    }
5570 20
5571 2 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5572
        $encoding === 'UTF-8'
5573
        ||
5574 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5575 2
    ) {
5576
      $encoding = 'UTF-8';
5577 2
    } else {
5578
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5579
    }
5580 20
5581
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5582 20
      self::checkForSupport();
5583 4
    }
5584
5585 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5586 19
        $encoding !== 'UTF-8'
5587 19
        &&
5588
        self::$SUPPORT['mbstring'] === false
5589
    ) {
5590 19
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5591 19
    }
5592
5593 19 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5594 19
      $returnTmp = \mb_strrpos($haystack, $needle, $offset, $encoding);
5595 19
      if ($returnTmp !== false) {
5596 19
        return $returnTmp;
5597
      }
5598 19
    }
5599
5600 16 View Code Duplication
    if (self::$SUPPORT['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5601 16
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
5602 16
      if ($returnTmp !== false) {
5603 16
        return $returnTmp;
5604 5
      }
5605 5
    }
5606 5
5607
    // fallback via vanilla php
5608
5609 19
    if ($offset > 0) {
5610
      $haystack = self::substr($haystack, $offset);
5611 17
    } elseif ($offset < 0) {
5612 13
      $haystack = self::substr($haystack, 0, $offset);
5613 13
      $offset = 0;
5614 13
    }
5615 8
5616 8
    $pos = strrpos($haystack, $needle);
5617 8
    if ($pos === false) {
5618
      return false;
5619
    }
5620 19
5621
    return $offset + self::strlen(substr($haystack, 0, $pos));
5622 9
  }
5623 4
5624 4
  /**
5625 4
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5626 6
   * mask.
5627 6
   *
5628 6
   * @param string $str    <p>The input string.</p>
5629
   * @param string $mask   <p>The mask of chars</p>
5630
   * @param int    $offset [optional]
5631 9
   * @param int    $length [optional]
5632 6
   *
5633 6
   * @return int
5634 6
   */
5635
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5636
  {
5637 19
    // init
5638
    $length = (int)$length;
5639 4
    $offset = (int)$offset;
5640 4
5641 2
    if ($offset || 2147483647 !== $length) {
5642 2
      $str = self::substr($str, $offset, $length);
5643 3
    }
5644 3
5645 3
    $str = (string)$str;
5646
    if (!isset($str[0], $mask[0])) {
5647
      return 0;
5648 4
    }
5649 16
5650
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5651 19
  }
5652
5653
  /**
5654 19
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5655 19
   *
5656
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5657 3
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5658 19
   * @param bool    $before_needle [optional] <p>
5659
   *                               If <b>TRUE</b>, strstr() returns the part of the
5660 19
   *                               haystack before the first occurrence of the needle (excluding the needle).
5661
   *                               </p>
5662
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5663 19
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5664 19
   *
5665 19
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5666 2
   */
5667 19
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5668
  {
5669 19
    $haystack = (string)$haystack;
5670
    $needle = (string)$needle;
5671 19
5672
    if (!isset($haystack[0], $needle[0])) {
5673
      return false;
5674
    }
5675
5676
    if ($cleanUtf8 === true) {
5677
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5678
      // if invalid characters are found in $haystack before $needle
5679
      $needle = self::clean($needle);
5680
      $haystack = self::clean($haystack);
5681
    }
5682
5683
    if ($encoding !== 'UTF-8') {
5684
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5685
    }
5686
5687 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5688
      self::checkForSupport();
5689 26
    }
5690
5691 26 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5692 5
        $encoding !== 'UTF-8'
5693
        &&
5694
        self::$SUPPORT['mbstring'] === false
5695
    ) {
5696 22
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5697 6
    }
5698
5699
    if (self::$SUPPORT['mbstring'] === true) {
5700 16
      $returnTmp = \mb_strstr($haystack, $needle, $before_needle, $encoding);
5701
      if ($returnTmp !== false) {
5702
        return $returnTmp;
5703
      }
5704
    }
5705
5706
    if (self::$SUPPORT['intl'] === true) {
5707
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
5708
      if ($returnTmp !== false) {
5709
        return $returnTmp;
5710
      }
5711
    }
5712 14
5713
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5714 14
5715
    if (!isset($match[1])) {
5716
      return false;
5717
    }
5718
5719
    if ($before_needle) {
5720
      return $match[1];
5721
    }
5722
5723
    return self::substr($haystack, self::strlen($match[1]));
5724
  }
5725
5726
  /**
5727
   * Unicode transformation for case-less matching.
5728 1
   *
5729
   * @link http://unicode.org/reports/tr21/tr21-5.html
5730 1
   *
5731
   * @param string  $str       <p>The input string.</p>
5732
   * @param bool    $full      [optional] <p>
5733
   *                           <b>true</b>, replace full case folding chars (default)<br />
5734
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5735
   *                           </p>
5736
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5737
   *
5738
   * @return string
5739
   */
5740
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5741
  {
5742
    // init
5743
    $str = (string)$str;
5744 8
5745
    if (!isset($str[0])) {
5746 8
      return '';
5747 2
    }
5748
5749
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5750 7
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5751 7
5752 7
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5753
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
5754 7
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
5755 1
    }
5756 1
5757 7
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5758
5759
    if ($full) {
5760 7
5761
      static $FULL_CASE_FOLD = null;
5762 7
5763 7
      if ($FULL_CASE_FOLD === null) {
5764
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5765
      }
5766
5767 7
      /** @noinspection OffsetOperationsInspection */
5768
      $str = str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5769
    }
5770
5771 1
    if ($cleanUtf8 === true) {
5772 1
      $str = self::clean($str);
5773 1
    }
5774 7
5775 7
    return self::strtolower($str);
5776 7
  }
5777
5778 7
  /**
5779 7
   * Make a string lowercase.
5780
   *
5781 7
   * @link http://php.net/manual/en/function.mb-strtolower.php
5782
   *
5783
   * @param string  $str       <p>The string being lowercased.</p>
5784
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5785
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5786
   *
5787
   * @return string str with all alphabetic characters converted to lowercase.
5788
   */
5789 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5790
  {
5791
    // init
5792
    $str = (string)$str;
5793
5794
    if (!isset($str[0])) {
5795
      return '';
5796
    }
5797
5798
    if ($cleanUtf8 === true) {
5799
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5800
      // if invalid characters are found in $haystack before $needle
5801 1
      $str = self::clean($str);
5802
    }
5803 1
5804
    if ($encoding !== 'UTF-8') {
5805 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5806 1
    }
5807
5808
    return \mb_strtolower($str, $encoding);
5809 1
  }
5810
5811 1
  /**
5812
   * Generic case sensitive transformation for collation matching.
5813 1
   *
5814 1
   * @param string $str <p>The input string</p>
5815 1
   *
5816 1
   * @return string
5817
   */
5818 1
  private static function strtonatfold($str)
5819 1
  {
5820 1
    /** @noinspection PhpUndefinedClassInspection */
5821
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5822 1
  }
5823
5824
  /**
5825
   * Make a string uppercase.
5826
   *
5827
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5828
   *
5829
   * @param string  $str       <p>The string being uppercased.</p>
5830 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5831
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5832
   *
5833
   * @return string str with all alphabetic characters converted to uppercase.
5834
   */
5835 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5836
  {
5837
    $str = (string)$str;
5838
5839
    if (!isset($str[0])) {
5840
      return '';
5841
    }
5842
5843
    if ($cleanUtf8 === true) {
5844
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5845
      // if invalid characters are found in $haystack before $needle
5846
      $str = self::clean($str);
5847
    }
5848
5849
    if ($encoding !== 'UTF-8') {
5850
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5851
    }
5852
5853
    return \mb_strtoupper($str, $encoding);
5854
  }
5855
5856
  /**
5857
   * Translate characters or replace sub-strings.
5858
   *
5859
   * @link  http://php.net/manual/en/function.strtr.php
5860
   *
5861
   * @param string          $str  <p>The string being translated.</p>
5862
   * @param string|string[] $from <p>The string replacing from.</p>
5863
   * @param string|string[] $to   <p>The string being translated to to.</p>
5864
   *
5865
   * @return string <p>
5866
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5867
   *                corresponding character in to.
5868
   *                </p>
5869
   */
5870
  public static function strtr($str, $from, $to = INF)
5871
  {
5872
    if (INF !== $to) {
5873
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5873 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5874
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5874 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5875
      $countFrom = count($from);
5876
      $countTo = count($to);
5877
5878
      if ($countFrom > $countTo) {
5879
        $from = array_slice($from, 0, $countTo);
5880
      } elseif ($countFrom < $countTo) {
5881
        $to = array_slice($to, 0, $countFrom);
5882
      }
5883
5884
      $from = array_combine($from, $to);
5885
    }
5886
5887
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5870 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5888
  }
5889
5890
  /**
5891
   * Return the width of a string.
5892
   *
5893
   * @param string  $str       <p>The input string.</p>
5894
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5895
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5896
   *
5897
   * @return int
5898
   */
5899
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5900
  {
5901
    if ($encoding !== 'UTF-8') {
5902
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5903
    }
5904
5905
    if ($cleanUtf8 === true) {
5906
      // iconv and mbstring are not tolerant to invalid encoding
5907
      // further, their behaviour is inconsistent with that of PHP's substr
5908
      $str = self::clean($str);
5909
    }
5910
5911
    // fallback to "mb_"-function via polyfill
5912
    return \mb_strwidth($str, $encoding);
5913
  }
5914
5915
  /**
5916
   * Get part of a string.
5917
   *
5918
   * @link http://php.net/manual/en/function.mb-substr.php
5919
   *
5920
   * @param string  $str       <p>The string being checked.</p>
5921
   * @param int     $start     <p>The first position used in str.</p>
5922
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5923
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5924
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5925
   *
5926
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5927
   */
5928
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5929
  {
5930
    // init
5931
    $str = (string)$str;
5932
5933
    if (!isset($str[0])) {
5934
      return '';
5935
    }
5936
5937
    if ($cleanUtf8 === true) {
5938
      // iconv and mbstring are not tolerant to invalid encoding
5939
      // further, their behaviour is inconsistent with that of PHP's substr
5940
      $str = self::clean($str);
5941
    }
5942
5943
    $str_length = 0;
5944
    if ($start || $length === null) {
5945
      $str_length = (int)self::strlen($str);
5946
    }
5947
5948
    if ($start && $start > $str_length) {
5949
      return false;
5950
    }
5951
5952
    if ($length === null) {
5953
      $length = $str_length;
5954
    } else {
5955
      $length = (int)$length;
5956
    }
5957
5958 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5959
        $encoding === 'UTF-8'
5960
        ||
5961
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5962
    ) {
5963
      $encoding = 'UTF-8';
5964
    } else {
5965
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5966
    }
5967
5968
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5969
      self::checkForSupport();
5970
    }
5971
5972 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5973
        $encoding !== 'UTF-8'
5974
        &&
5975
        self::$SUPPORT['mbstring'] === false
5976
    ) {
5977
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5978
    }
5979
5980
    if (self::$SUPPORT['mbstring'] === true) {
5981
      return \mb_substr($str, $start, $length, $encoding);
5982
    }
5983
5984
    if (
5985
        $length >= 0 // "iconv_substr()" can't handle negative length
5986
        &&
5987
        self::$SUPPORT['iconv'] === true
5988
    ) {
5989
      return \iconv_substr($str, $start, $length);
5990
    }
5991
5992
    if (self::$SUPPORT['intl'] === true) {
5993
      return \grapheme_substr($str, $start, $length);
5994
    }
5995
5996
    // fallback via vanilla php
5997
5998
    // split to array, and remove invalid characters
5999
    $array = self::split($str);
6000
6001
    // extract relevant part, and join to make sting again
6002
    return implode('', array_slice($array, $start, $length));
6003
  }
6004
6005
  /**
6006
   * Binary safe comparison of two strings from an offset, up to length characters.
6007
   *
6008
   * @param string  $main_str           <p>The main string being compared.</p>
6009
   * @param string  $str                <p>The secondary string being compared.</p>
6010
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
6011
   *                                    the end of the string.</p>
6012
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6013
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6014
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6015
   *                                    insensitive.</p>
6016
   *
6017
   * @return int
6018
   */
6019
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
6020
  {
6021
    $main_str = self::substr($main_str, $offset, $length);
6022
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6021 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6023
6024
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6021 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6022 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6021 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6022 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6025
  }
6026
6027
  /**
6028
   * Count the number of substring occurrences.
6029
   *
6030
   * @link  http://php.net/manual/en/function.substr-count.php
6031
   *
6032
   * @param string  $haystack  <p>The string to search in.</p>
6033
   * @param string  $needle    <p>The substring to search for.</p>
6034
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6035
   * @param int     $length    [optional] <p>
6036
   *                           The maximum length after the specified offset to search for the
6037
   *                           substring. It outputs a warning if the offset plus the length is
6038
   *                           greater than the haystack length.
6039
   *                           </p>
6040
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
6041
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6042
   *
6043
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6044
   */
6045
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6046
  {
6047
    // init
6048
    $haystack = (string)$haystack;
6049
    $needle = (string)$needle;
6050
6051
    if (!isset($haystack[0], $needle[0])) {
6052
      return false;
6053
    }
6054
6055
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6056
      $offset = (int)$offset;
6057 1
      $length = (int)$length;
6058
6059 1
      if (
6060
          $length + $offset <= 0
6061
          &&
6062
          Bootup::is_php('7.1') === false
6063
      ) {
6064
        return false;
6065
      }
6066
6067
      $haystack = self::substr($haystack, $offset, $length, $encoding);
6068
    }
6069 6
6070
    if ($encoding !== 'UTF-8') {
6071 6
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6072 6
    }
6073
6074 6
    if ($cleanUtf8 === true) {
6075
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6076 6
      // if invalid characters are found in $haystack before $needle
6077 3
      $needle = self::clean($needle);
6078
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6079
    }
6080
6081 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6082
      self::checkForSupport();
6083 6
    }
6084 1
6085 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6086 1
        $encoding !== 'UTF-8'
6087
        &&
6088 6
        self::$SUPPORT['mbstring'] === false
6089
    ) {
6090
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6091
    }
6092
6093
    if (self::$SUPPORT['mbstring'] === true) {
6094
      return \mb_substr_count($haystack, $needle, $encoding);
6095
    }
6096
6097
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6098 6
6099
    return count($matches);
6100 6
  }
6101
6102 6
  /**
6103 6
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6104
   *
6105
   * @param string $haystack <p>The string to search in.</p>
6106 5
   * @param string $needle   <p>The substring to search for.</p>
6107 5
   *
6108
   * @return string <p>Return the sub-string.</p>
6109 5
   */
6110 1 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6111 1
  {
6112 1
    // init
6113
    $haystack = (string)$haystack;
6114 5
    $needle = (string)$needle;
6115
6116
    if (!isset($haystack[0])) {
6117
      return '';
6118
    }
6119
6120
    if (!isset($needle[0])) {
6121
      return $haystack;
6122
    }
6123
6124
    if (self::str_istarts_with($haystack, $needle) === true) {
6125
      $haystack = self::substr($haystack, self::strlen($needle));
6126
    }
6127
6128
    return $haystack;
6129
  }
6130
6131
  /**
6132
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6133
   *
6134
   * @param string $haystack <p>The string to search in.</p>
6135
   * @param string $needle   <p>The substring to search for.</p>
6136
   *
6137
   * @return string <p>Return the sub-string.</p>
6138
   */
6139 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6140
  {
6141
    // init
6142
    $haystack = (string)$haystack;
6143
    $needle = (string)$needle;
6144 1
6145
    if (!isset($haystack[0])) {
6146 1
      return '';
6147
    }
6148
6149
    if (!isset($needle[0])) {
6150
      return $haystack;
6151
    }
6152
6153
    if (self::str_iends_with($haystack, $needle) === true) {
6154
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6155
    }
6156
6157
    return $haystack;
6158 1
  }
6159
6160 1
  /**
6161
   * Removes an prefix ($needle) from start of the string ($haystack).
6162 1
   *
6163 1
   * @param string $haystack <p>The string to search in.</p>
6164
   * @param string $needle   <p>The substring to search for.</p>
6165
   *
6166 1
   * @return string <p>Return the sub-string.</p>
6167
   */
6168 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6169 1
  {
6170
    // init
6171
    $haystack = (string)$haystack;
6172 1
    $needle = (string)$needle;
6173
6174
    if (!isset($haystack[0])) {
6175 1
      return '';
6176 1
    }
6177 1
6178 1
    if (!isset($needle[0])) {
6179 1
      return $haystack;
6180
    }
6181
6182 1
    if (self::str_starts_with($haystack, $needle) === true) {
6183
      $haystack = self::substr($haystack, self::strlen($needle));
6184
    }
6185
6186
    return $haystack;
6187
  }
6188
6189
  /**
6190
   * Replace text within a portion of a string.
6191
   *
6192
   * source: https://gist.github.com/stemar/8287074
6193
   *
6194
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6195
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6196
   * @param int|int[]       $start            <p>
6197
   *                                          If start is positive, the replacing will begin at the start'th offset
6198
   *                                          into string.
6199
   *                                          <br /><br />
6200
   *                                          If start is negative, the replacing will begin at the start'th character
6201 10
   *                                          from the end of string.
6202
   *                                          </p>
6203 10
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6204 10
   *                                          portion of string which is to be replaced. If it is negative, it
6205
   *                                          represents the number of characters from the end of string at which to
6206 10
   *                                          stop replacing. If it is not given, then it will default to strlen(
6207 3
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6208
   *                                          length is zero then this function will have the effect of inserting
6209
   *                                          replacement into string at the given start offset.</p>
6210 8
   *
6211 8
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6212 8
   */
6213
  public static function substr_replace($str, $replacement, $start, $length = null)
6214 8
  {
6215
    if (is_array($str) === true) {
6216 8
      $num = count($str);
6217
6218 8
      // $replacement
6219 1
      if (is_array($replacement) === true) {
6220 1
        $replacement = array_slice($replacement, 0, $num);
6221 1
      } else {
6222
        $replacement = array_pad(array($replacement), $num, $replacement);
6223 8
      }
6224 8
6225
      // $start
6226 8 View Code Duplication
      if (is_array($start) === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6227 8
        $start = array_slice($start, 0, $num);
6228 8
        foreach ($start as &$valueTmp) {
6229 8
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6230 8
        }
6231
        unset($valueTmp);
6232 8
      } else {
6233 8
        $start = array_pad(array($start), $num, $start);
6234 8
      }
6235 8
6236
      // $length
6237 8
      if (!isset($length)) {
6238 6
        $length = array_fill(0, $num, 0);
6239 6 View Code Duplication
      } elseif (is_array($length) === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6240 6
        $length = array_slice($length, 0, $num);
6241 6
        foreach ($length as &$valueTmpV2) {
6242
          if (isset($valueTmpV2)) {
6243 6
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6244 3
          } else {
6245 3
            $valueTmpV2 = 0;
6246
          }
6247 6
        }
6248 6
        unset($valueTmpV2);
6249
      } else {
6250 8
        $length = array_pad(array($length), $num, $length);
6251
      }
6252
6253
      // Recursive call
6254
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6255
6256
    } else {
6257
6258 1
      if (is_array($replacement) === true) {
6259
        if (count($replacement) > 0) {
6260 1
          $replacement = $replacement[0];
6261
        } else {
6262
          $replacement = '';
6263
        }
6264
      }
6265
    }
6266
6267
    // init
6268
    $str = (string)$str;
6269
    $replacement = (string)$replacement;
6270
6271
    if (!isset($str[0])) {
6272
      return $replacement;
6273
    }
6274
6275
    preg_match_all('/./us', $str, $smatches);
6276
    preg_match_all('/./us', $replacement, $rmatches);
6277
6278
    if ($length === null) {
6279
      $length = (int)self::strlen($str);
6280
    }
6281
6282
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6283
6284
    return implode('', $smatches[0]);
6285
  }
6286
6287
  /**
6288
   * Removes an suffix ($needle) from end of the string ($haystack).
6289
   *
6290
   * @param string $haystack <p>The string to search in.</p>
6291
   * @param string $needle   <p>The substring to search for.</p>
6292
   *
6293
   * @return string <p>Return the sub-string.</p>
6294
   */
6295 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6296
  {
6297
    $haystack = (string)$haystack;
6298
    $needle = (string)$needle;
6299
6300
    if (!isset($haystack[0])) {
6301
      return '';
6302
    }
6303
6304
    if (!isset($needle[0])) {
6305
      return $haystack;
6306
    }
6307
6308
    if (self::str_ends_with($haystack, $needle) === true) {
6309
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6310
    }
6311
6312
    return $haystack;
6313
  }
6314
6315
  /**
6316
   * Returns a case swapped version of the string.
6317
   *
6318
   * @param string  $str       <p>The input string.</p>
6319
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6320
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6321
   *
6322
   * @return string <p>Each character's case swapped.</p>
6323
   */
6324
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6325
  {
6326
    $str = (string)$str;
6327
6328
    if (!isset($str[0])) {
6329
      return '';
6330
    }
6331
6332
    if ($encoding !== 'UTF-8') {
6333
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6334
    }
6335
6336
    if ($cleanUtf8 === true) {
6337
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6338
      // if invalid characters are found in $haystack before $needle
6339
      $str = self::clean($str);
6340
    }
6341
6342
    $strSwappedCase = preg_replace_callback(
6343
        '/[\S]/u',
6344
        function ($match) use ($encoding) {
6345
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6346
6347
          if ($match[0] === $marchToUpper) {
6348
            return UTF8::strtolower($match[0], $encoding);
6349
          } else {
6350
            return $marchToUpper;
6351
          }
6352
        },
6353
        $str
6354
    );
6355
6356
    return $strSwappedCase;
6357
  }
6358
6359
  /**
6360
   * alias for "UTF8::to_ascii()"
6361
   *
6362
   * @see UTF8::to_ascii()
6363
   *
6364
   * @param string $s
6365
   * @param string $subst_chr
6366
   * @param bool   $strict
6367
   *
6368
   * @return string
6369
   *
6370
   * @deprecated
6371
   */
6372
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6373
  {
6374
    return self::to_ascii($s, $subst_chr, $strict);
6375
  }
6376
6377
  /**
6378
   * alias for "UTF8::to_iso8859()"
6379
   *
6380
   * @see UTF8::to_iso8859()
6381
   *
6382
   * @param string $str
6383
   *
6384
   * @return string|string[]
6385
   *
6386
   * @deprecated
6387
   */
6388
  public static function toIso8859($str)
6389
  {
6390
    return self::to_iso8859($str);
6391
  }
6392
6393
  /**
6394
   * alias for "UTF8::to_latin1()"
6395
   *
6396
   * @see UTF8::to_latin1()
6397
   *
6398
   * @param $str
6399
   *
6400
   * @return string
6401
   *
6402
   * @deprecated
6403
   */
6404
  public static function toLatin1($str)
6405
  {
6406
    return self::to_latin1($str);
6407
  }
6408
6409
  /**
6410
   * alias for "UTF8::to_utf8()"
6411
   *
6412
   * @see UTF8::to_utf8()
6413
   *
6414
   * @param string $str
6415
   *
6416
   * @return string
6417
   *
6418
   * @deprecated
6419
   */
6420
  public static function toUTF8($str)
6421
  {
6422
    return self::to_utf8($str);
6423
  }
6424
6425
  /**
6426
   * Convert a string into ASCII.
6427
   *
6428
   * @param string $str     <p>The input string.</p>
6429
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6430
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6431
   *                        performance</p>
6432
   *
6433
   * @return string
6434
   */
6435
  public static function to_ascii($str, $unknown = '?', $strict = false)
6436
  {
6437
    static $UTF8_TO_ASCII;
6438
6439
    // init
6440
    $str = (string)$str;
6441
6442
    if (!isset($str[0])) {
6443
      return '';
6444
    }
6445
6446
    $str = self::clean($str, true, true, true);
6447
6448
    // check if we only have ASCII
6449
    if (self::is_ascii($str) === true) {
6450
      return $str;
6451
    }
6452
6453
    if ($strict === true) {
6454
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6455
        self::checkForSupport();
6456
      }
6457
6458
      if (
6459
          self::$SUPPORT['intl'] === true
6460
          &&
6461
          Bootup::is_php('5.4') === true
6462
      ) {
6463
6464
        // HACK for issue from "transliterator_transliterate()"
6465
        $str = str_replace(
6466
            'ℌ',
6467
            'H',
6468
            $str
6469
        );
6470
6471
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6472
6473
        // check again, if we only have ASCII, now ...
6474
        if (self::is_ascii($str) === true) {
6475
          return $str;
6476
        }
6477
6478
      }
6479
    }
6480
6481
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6482
    $chars = $ar[0];
6483
    foreach ($chars as &$c) {
6484
6485
      $ordC0 = ord($c[0]);
6486
6487
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6488
        continue;
6489
      }
6490
6491
      $ordC1 = ord($c[1]);
6492
6493
      // ASCII - next please
6494
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6495
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6496
      }
6497
6498
      if ($ordC0 >= 224) {
6499
        $ordC2 = ord($c[2]);
6500
6501
        if ($ordC0 <= 239) {
6502
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6503
        }
6504
6505
        if ($ordC0 >= 240) {
6506
          $ordC3 = ord($c[3]);
6507
6508
          if ($ordC0 <= 247) {
6509
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6510
          }
6511
6512
          if ($ordC0 >= 248) {
6513
            $ordC4 = ord($c[4]);
6514
6515 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6516
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6517
            }
6518
6519
            if ($ordC0 >= 252) {
6520
              $ordC5 = ord($c[5]);
6521
6522 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6523
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6524
              }
6525
            }
6526
          }
6527
        }
6528
      }
6529
6530
      if ($ordC0 == 254 || $ordC0 == 255) {
6531
        $c = $unknown;
6532
        continue;
6533
      }
6534
6535
      if (!isset($ord)) {
6536
        $c = $unknown;
6537
        continue;
6538
      }
6539
6540
      $bank = $ord >> 8;
6541
      if (!isset($UTF8_TO_ASCII[$bank])) {
6542
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6543
        if ($UTF8_TO_ASCII[$bank] === false) {
6544
          $UTF8_TO_ASCII[$bank] = array();
6545
        }
6546
      }
6547
6548
      $newchar = $ord & 255;
6549
6550
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6551
6552
        // keep for debugging
6553
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
45% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6554
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6555
        echo "char: " . $c . "\n";
6556
        echo "ord: " . $ord . "\n";
6557
        echo "newchar: " . $newchar . "\n";
6558
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6559
        echo "bank:" . $bank . "\n\n";
6560
        */
6561
6562
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6563
      } else {
6564
6565
        // keep for debugging missing chars
6566
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
41% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6567
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6568
        echo "char: " . $c . "\n";
6569
        echo "ord: " . $ord . "\n";
6570
        echo "newchar: " . $newchar . "\n";
6571
        echo "bank:" . $bank . "\n\n";
6572
        */
6573
6574
        $c = $unknown;
6575
      }
6576
    }
6577
6578
    return implode('', $chars);
6579
  }
6580
6581
  /**
6582
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6583
   *
6584
   * @param string|string[] $str
6585
   *
6586
   * @return string|string[]
6587
   */
6588
  public static function to_iso8859($str)
6589
  {
6590
    if (is_array($str) === true) {
6591
6592
      /** @noinspection ForeachSourceInspection */
6593
      foreach ($str as $k => $v) {
6594
        /** @noinspection AlterInForeachInspection */
6595
        /** @noinspection OffsetOperationsInspection */
6596
        $str[$k] = self::to_iso8859($v);
6597
      }
6598
6599
      return $str;
6600
    }
6601
6602
    $str = (string)$str;
6603
6604
    if (!isset($str[0])) {
6605
      return '';
6606
    }
6607
6608
    return self::utf8_decode($str);
6609
  }
6610
6611
  /**
6612
   * alias for "UTF8::to_iso8859()"
6613
   *
6614
   * @see UTF8::to_iso8859()
6615
   *
6616
   * @param string|string[] $str
6617
   *
6618
   * @return string|string[]
6619
   */
6620
  public static function to_latin1($str)
6621
  {
6622
    return self::to_iso8859($str);
6623
  }
6624
6625
  /**
6626
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6627
   *
6628
   * <ul>
6629
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6630
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
6631
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6632
   * case.</li>
6633
   * </ul>
6634
   *
6635
   * @param string|string[] $str                    <p>Any string or array.</p>
6636
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6637
   *
6638
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6639
   */
6640
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6641
  {
6642
    if (is_array($str) === true) {
6643
      /** @noinspection ForeachSourceInspection */
6644
      foreach ($str as $k => $v) {
6645
        /** @noinspection AlterInForeachInspection */
6646
        /** @noinspection OffsetOperationsInspection */
6647
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6648
      }
6649
6650
      return $str;
6651
    }
6652
6653
    $str = (string)$str;
6654
6655
    if (!isset($str[0])) {
6656
      return $str;
6657
    }
6658
6659
    $max = strlen($str);
6660
    $buf = '';
6661
6662
    /** @noinspection ForeachInvariantsInspection */
6663
    for ($i = 0; $i < $max; $i++) {
6664
6665
      $c1 = $str[$i];
6666
6667
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6668
6669
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6670
6671
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6672
6673
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6674
            $buf .= $c1 . $c2;
6675
            $i++;
6676 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6677
            $cc1tmp = ord($c1) / 64;
6678
            $cc1 = UTF8NonStrict::chr($cc1tmp) | "\xC0";
6679
            $cc2 = ($c1 & "\x3F") | "\x80";
6680
            $buf .= $cc1 . $cc2;
6681
          }
6682
6683
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6684
6685
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6686
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6687
6688
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6689
            $buf .= $c1 . $c2 . $c3;
6690
            $i += 2;
6691 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6692
            $cc1tmp = ord($c1) / 64;
6693
            $cc1 = UTF8NonStrict::chr($cc1tmp) | "\xC0";
6694
            $cc2 = ($c1 & "\x3F") | "\x80";
6695
            $buf .= $cc1 . $cc2;
6696
          }
6697
6698
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6699
6700
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6701
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6702
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6703
6704
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6705
            $buf .= $c1 . $c2 . $c3 . $c4;
6706
            $i += 3;
6707 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6708
            $cc1tmp = ord($c1) / 64;
6709
            $cc1 = UTF8NonStrict::chr($cc1tmp) | "\xC0";
6710
            $cc2 = ($c1 & "\x3F") | "\x80";
6711
            $buf .= $cc1 . $cc2;
6712
          }
6713
6714 View Code Duplication
        } else { // doesn't look like UTF8, but should be converted
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6715
          $cc1tmp = ord($c1) / 64;
6716
          $cc1 = UTF8NonStrict::chr($cc1tmp) | "\xC0";
6717
          $cc2 = ($c1 & "\x3F") | "\x80";
6718
          $buf .= $cc1 . $cc2;
6719
        }
6720
6721
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6722
6723
        $ordC1 = ord($c1);
6724
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6725
          $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6726 View Code Duplication
        } else {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6727
          $cc1 = UTF8NonStrict::chr($ordC1 / 64) | "\xC0";
6728
          $cc2 = ($c1 & "\x3F") | "\x80";
6729
          $buf .= $cc1 . $cc2;
6730
        }
6731
6732
      } else { // it doesn't need conversion
6733
        $buf .= $c1;
6734
      }
6735
    }
6736
6737
    // decode unicode escape sequences
6738
    $buf = preg_replace_callback(
6739
        '/\\\\u([0-9a-f]{4})/i',
6740
        function ($match) {
6741
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6742
        },
6743
        $buf
6744
    );
6745
6746
    // decode UTF-8 codepoints
6747
    if ($decodeHtmlEntityToUtf8 === true) {
6748
      $buf = self::html_entity_decode($buf);
6749
    }
6750
6751
    return $buf;
6752
  }
6753
6754
  /**
6755
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6756
   *
6757
   * INFO: This is slower then "trim()"
6758
   *
6759
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6760
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6761
   *
6762
   * @param string $str   <p>The string to be trimmed</p>
6763
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6764
   *
6765
   * @return string <p>The trimmed string.</p>
6766
   */
6767
  public static function trim($str = '', $chars = INF)
6768
  {
6769
    $str = (string)$str;
6770
6771
    if (!isset($str[0])) {
6772
      return '';
6773
    }
6774
6775
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6776
    if ($chars === INF || !$chars) {
6777
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6778
    }
6779
6780
    return self::rtrim(self::ltrim($str, $chars), $chars);
6781
  }
6782
6783
  /**
6784
   * Makes string's first char uppercase.
6785
   *
6786
   * @param string  $str       <p>The input string.</p>
6787
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6788
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6789
   *
6790
   * @return string <p>The resulting string</p>
6791
   */
6792
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6793
  {
6794
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6795
  }
6796
6797
  /**
6798
   * alias for "UTF8::ucfirst()"
6799
   *
6800
   * @see UTF8::ucfirst()
6801
   *
6802
   * @param string  $word
6803
   * @param string  $encoding
6804
   * @param boolean $cleanUtf8
6805
   *
6806
   * @return string
6807
   */
6808
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6809
  {
6810
    return self::ucfirst($word, $encoding, $cleanUtf8);
6811
  }
6812
6813
  /**
6814
   * Uppercase for all words in the string.
6815
   *
6816
   * @param string   $str        <p>The input string.</p>
6817
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6818
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6819
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6820
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6821
   *
6822
   * @return string
6823
   */
6824
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6825
  {
6826
    if (!$str) {
6827
      return '';
6828
    }
6829
6830
    $words = self::str_to_words($str, $charlist);
6831
    $newwords = array();
6832
6833
    if (count($exceptions) > 0) {
6834
      $useExceptions = true;
6835
    } else {
6836
      $useExceptions = false;
6837
    }
6838
6839
    foreach ($words as $word) {
6840
6841
      if (!$word) {
6842
        continue;
6843
      }
6844
6845
      if (
6846
          ($useExceptions === false)
6847
          ||
6848
          (
6849
              $useExceptions === true
6850
              &&
6851
              !in_array($word, $exceptions, true)
6852
          )
6853
      ) {
6854
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6855
      }
6856
6857
      $newwords[] = $word;
6858
    }
6859
6860
    return implode('', $newwords);
6861
  }
6862
6863
  /**
6864
   * Multi decode html entity & fix urlencoded-win1252-chars.
6865
   *
6866
   * e.g:
6867
   * 'test+test'                     => 'test test'
6868
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6869
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6870
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6871
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6872
   * 'Düsseldorf'                   => 'Düsseldorf'
6873
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6874
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6875
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6876
   *
6877
   * @param string $str          <p>The input string.</p>
6878
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6879
   *
6880
   * @return string
6881
   */
6882 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6883
  {
6884
    $str = (string)$str;
6885
6886
    if (!isset($str[0])) {
6887
      return '';
6888
    }
6889
6890
    $pattern = '/%u([0-9a-f]{3,4})/i';
6891
    if (preg_match($pattern, $str)) {
6892
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6893
    }
6894
6895
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6896
6897
    do {
6898
      $str_compare = $str;
6899
6900
      $str = self::fix_simple_utf8(
6901
          urldecode(
6902
              self::html_entity_decode(
6903
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6904
                  $flags
6905
              )
6906
          )
6907
      );
6908
6909
    } while ($multi_decode === true && $str_compare !== $str);
6910
6911
    return (string)$str;
6912
  }
6913
6914
  /**
6915
   * Return a array with "urlencoded"-win1252 -> UTF-8
6916
   *
6917
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6918
   *
6919
   * @return array
6920
   */
6921
  public static function urldecode_fix_win1252_chars()
6922
  {
6923
    return array(
6924
        '%20' => ' ',
6925
        '%21' => '!',
6926
        '%22' => '"',
6927
        '%23' => '#',
6928
        '%24' => '$',
6929
        '%25' => '%',
6930
        '%26' => '&',
6931
        '%27' => "'",
6932
        '%28' => '(',
6933
        '%29' => ')',
6934
        '%2A' => '*',
6935
        '%2B' => '+',
6936
        '%2C' => ',',
6937
        '%2D' => '-',
6938
        '%2E' => '.',
6939
        '%2F' => '/',
6940
        '%30' => '0',
6941
        '%31' => '1',
6942
        '%32' => '2',
6943
        '%33' => '3',
6944
        '%34' => '4',
6945
        '%35' => '5',
6946
        '%36' => '6',
6947
        '%37' => '7',
6948
        '%38' => '8',
6949
        '%39' => '9',
6950
        '%3A' => ':',
6951
        '%3B' => ';',
6952
        '%3C' => '<',
6953
        '%3D' => '=',
6954
        '%3E' => '>',
6955
        '%3F' => '?',
6956
        '%40' => '@',
6957
        '%41' => 'A',
6958
        '%42' => 'B',
6959
        '%43' => 'C',
6960
        '%44' => 'D',
6961
        '%45' => 'E',
6962
        '%46' => 'F',
6963
        '%47' => 'G',
6964
        '%48' => 'H',
6965
        '%49' => 'I',
6966
        '%4A' => 'J',
6967
        '%4B' => 'K',
6968
        '%4C' => 'L',
6969
        '%4D' => 'M',
6970
        '%4E' => 'N',
6971
        '%4F' => 'O',
6972
        '%50' => 'P',
6973
        '%51' => 'Q',
6974
        '%52' => 'R',
6975
        '%53' => 'S',
6976
        '%54' => 'T',
6977
        '%55' => 'U',
6978
        '%56' => 'V',
6979
        '%57' => 'W',
6980
        '%58' => 'X',
6981
        '%59' => 'Y',
6982
        '%5A' => 'Z',
6983
        '%5B' => '[',
6984
        '%5C' => '\\',
6985
        '%5D' => ']',
6986
        '%5E' => '^',
6987
        '%5F' => '_',
6988
        '%60' => '`',
6989
        '%61' => 'a',
6990
        '%62' => 'b',
6991
        '%63' => 'c',
6992
        '%64' => 'd',
6993
        '%65' => 'e',
6994
        '%66' => 'f',
6995
        '%67' => 'g',
6996
        '%68' => 'h',
6997
        '%69' => 'i',
6998
        '%6A' => 'j',
6999
        '%6B' => 'k',
7000
        '%6C' => 'l',
7001
        '%6D' => 'm',
7002
        '%6E' => 'n',
7003
        '%6F' => 'o',
7004
        '%70' => 'p',
7005
        '%71' => 'q',
7006
        '%72' => 'r',
7007
        '%73' => 's',
7008
        '%74' => 't',
7009
        '%75' => 'u',
7010
        '%76' => 'v',
7011
        '%77' => 'w',
7012
        '%78' => 'x',
7013
        '%79' => 'y',
7014
        '%7A' => 'z',
7015
        '%7B' => '{',
7016
        '%7C' => '|',
7017
        '%7D' => '}',
7018
        '%7E' => '~',
7019
        '%7F' => '',
7020
        '%80' => '`',
7021
        '%81' => '',
7022
        '%82' => '‚',
7023
        '%83' => 'ƒ',
7024
        '%84' => '„',
7025
        '%85' => '…',
7026
        '%86' => '†',
7027
        '%87' => '‡',
7028
        '%88' => 'ˆ',
7029
        '%89' => '‰',
7030
        '%8A' => 'Š',
7031
        '%8B' => '‹',
7032
        '%8C' => 'Œ',
7033
        '%8D' => '',
7034
        '%8E' => 'Ž',
7035
        '%8F' => '',
7036
        '%90' => '',
7037
        '%91' => '‘',
7038
        '%92' => '’',
7039
        '%93' => '“',
7040
        '%94' => '”',
7041
        '%95' => '•',
7042
        '%96' => '–',
7043
        '%97' => '—',
7044
        '%98' => '˜',
7045
        '%99' => '™',
7046
        '%9A' => 'š',
7047
        '%9B' => '›',
7048
        '%9C' => 'œ',
7049
        '%9D' => '',
7050
        '%9E' => 'ž',
7051
        '%9F' => 'Ÿ',
7052
        '%A0' => '',
7053
        '%A1' => '¡',
7054
        '%A2' => '¢',
7055
        '%A3' => '£',
7056
        '%A4' => '¤',
7057
        '%A5' => '¥',
7058
        '%A6' => '¦',
7059
        '%A7' => '§',
7060
        '%A8' => '¨',
7061
        '%A9' => '©',
7062
        '%AA' => 'ª',
7063
        '%AB' => '«',
7064
        '%AC' => '¬',
7065
        '%AD' => '',
7066
        '%AE' => '®',
7067
        '%AF' => '¯',
7068
        '%B0' => '°',
7069
        '%B1' => '±',
7070
        '%B2' => '²',
7071
        '%B3' => '³',
7072
        '%B4' => '´',
7073
        '%B5' => 'µ',
7074
        '%B6' => '¶',
7075
        '%B7' => '·',
7076
        '%B8' => '¸',
7077
        '%B9' => '¹',
7078
        '%BA' => 'º',
7079
        '%BB' => '»',
7080
        '%BC' => '¼',
7081
        '%BD' => '½',
7082
        '%BE' => '¾',
7083
        '%BF' => '¿',
7084
        '%C0' => 'À',
7085
        '%C1' => 'Á',
7086
        '%C2' => 'Â',
7087
        '%C3' => 'Ã',
7088
        '%C4' => 'Ä',
7089
        '%C5' => 'Å',
7090
        '%C6' => 'Æ',
7091
        '%C7' => 'Ç',
7092
        '%C8' => 'È',
7093
        '%C9' => 'É',
7094
        '%CA' => 'Ê',
7095
        '%CB' => 'Ë',
7096
        '%CC' => 'Ì',
7097
        '%CD' => 'Í',
7098
        '%CE' => 'Î',
7099
        '%CF' => 'Ï',
7100
        '%D0' => 'Ð',
7101
        '%D1' => 'Ñ',
7102
        '%D2' => 'Ò',
7103
        '%D3' => 'Ó',
7104
        '%D4' => 'Ô',
7105
        '%D5' => 'Õ',
7106
        '%D6' => 'Ö',
7107
        '%D7' => '×',
7108
        '%D8' => 'Ø',
7109
        '%D9' => 'Ù',
7110
        '%DA' => 'Ú',
7111
        '%DB' => 'Û',
7112
        '%DC' => 'Ü',
7113
        '%DD' => 'Ý',
7114
        '%DE' => 'Þ',
7115
        '%DF' => 'ß',
7116
        '%E0' => 'à',
7117
        '%E1' => 'á',
7118
        '%E2' => 'â',
7119
        '%E3' => 'ã',
7120
        '%E4' => 'ä',
7121
        '%E5' => 'å',
7122
        '%E6' => 'æ',
7123
        '%E7' => 'ç',
7124
        '%E8' => 'è',
7125
        '%E9' => 'é',
7126
        '%EA' => 'ê',
7127
        '%EB' => 'ë',
7128
        '%EC' => 'ì',
7129
        '%ED' => 'í',
7130
        '%EE' => 'î',
7131
        '%EF' => 'ï',
7132
        '%F0' => 'ð',
7133
        '%F1' => 'ñ',
7134
        '%F2' => 'ò',
7135
        '%F3' => 'ó',
7136
        '%F4' => 'ô',
7137
        '%F5' => 'õ',
7138
        '%F6' => 'ö',
7139
        '%F7' => '÷',
7140
        '%F8' => 'ø',
7141
        '%F9' => 'ù',
7142
        '%FA' => 'ú',
7143
        '%FB' => 'û',
7144
        '%FC' => 'ü',
7145
        '%FD' => 'ý',
7146
        '%FE' => 'þ',
7147
        '%FF' => 'ÿ',
7148
    );
7149
  }
7150
7151
  /**
7152
   * Decodes an UTF-8 string to ISO-8859-1.
7153
   *
7154
   * @param string $str <p>The input string.</p>
7155
   *
7156
   * @return string
7157
   */
7158
  public static function utf8_decode($str)
7159
  {
7160
    // init
7161
    $str = (string)$str;
7162
7163
    if (!isset($str[0])) {
7164
      return '';
7165
    }
7166
7167
    $str = (string)self::to_utf8($str);
7168
7169
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7170
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7171
7172
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7173
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7174
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7175
    }
7176
7177
    /** @noinspection PhpInternalEntityUsedInspection */
7178
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
7179
  }
7180
7181
  /**
7182
   * Encodes an ISO-8859-1 string to UTF-8.
7183
   *
7184
   * @param string $str <p>The input string.</p>
7185
   *
7186
   * @return string
7187
   */
7188
  public static function utf8_encode($str)
7189
  {
7190
    // init
7191
    $str = (string)$str;
7192
7193
    if (!isset($str[0])) {
7194
      return '';
7195
    }
7196
7197
    $str = \utf8_encode($str);
7198
7199
    if (false === strpos($str, "\xC2")) {
7200
      return $str;
7201
    } else {
7202
7203
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
7204
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
7205
7206
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7207
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7208
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7209
      }
7210
7211
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7212
    }
7213
  }
7214
7215
  /**
7216
   * fix -> utf8-win1252 chars
7217
   *
7218
   * @param string $str <p>The input string.</p>
7219
   *
7220
   * @return string
7221
   *
7222
   * @deprecated use "UTF8::fix_simple_utf8()"
7223
   */
7224
  public static function utf8_fix_win1252_chars($str)
7225
  {
7226
    return self::fix_simple_utf8($str);
7227
  }
7228
7229
  /**
7230
   * Returns an array with all utf8 whitespace characters.
7231
   *
7232
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7233
   *
7234
   * @author: Derek E. [email protected]
7235
   *
7236
   * @return array <p>
7237
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7238
   *               as defined in above URL.
7239
   *               </p>
7240
   */
7241
  public static function whitespace_table()
7242
  {
7243
    return self::$WHITESPACE_TABLE;
7244
  }
7245
7246
  /**
7247
   * Limit the number of words in a string.
7248
   *
7249
   * @param string $str      <p>The input string.</p>
7250
   * @param int    $words    <p>The limit of words as integer.</p>
7251
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7252
   *
7253
   * @return string
7254
   */
7255
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7256
  {
7257
    $str = (string)$str;
7258
7259
    if (!isset($str[0])) {
7260
      return '';
7261
    }
7262
7263
    $words = (int)$words;
7264
7265
    if ($words < 1) {
7266
      return '';
7267
    }
7268
7269
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7270
7271
    if (
7272
        !isset($matches[0])
7273
        ||
7274
        self::strlen($str) === self::strlen($matches[0])
7275
    ) {
7276
      return $str;
7277
    }
7278
7279
    return self::rtrim($matches[0]) . $strAddOn;
7280
  }
7281
7282
  /**
7283
   * Wraps a string to a given number of characters
7284
   *
7285
   * @link  http://php.net/manual/en/function.wordwrap.php
7286
   *
7287
   * @param string $str   <p>The input string.</p>
7288
   * @param int    $width [optional] <p>The column width.</p>
7289
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7290
   * @param bool   $cut   [optional] <p>
7291
   *                      If the cut is set to true, the string is
7292
   *                      always wrapped at or before the specified width. So if you have
7293
   *                      a word that is larger than the given width, it is broken apart.
7294
   *                      </p>
7295
   *
7296
   * @return string <p>The given string wrapped at the specified column.</p>
7297
   */
7298
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7299
  {
7300
    $str = (string)$str;
7301
    $break = (string)$break;
7302
7303
    if (!isset($str[0], $break[0])) {
7304
      return '';
7305
    }
7306
7307
    $w = '';
7308
    $strSplit = explode($break, $str);
7309
    $count = count($strSplit);
7310
7311
    $chars = array();
7312
    /** @noinspection ForeachInvariantsInspection */
7313
    for ($i = 0; $i < $count; ++$i) {
7314
7315
      if ($i) {
7316
        $chars[] = $break;
7317
        $w .= '#';
7318
      }
7319
7320
      $c = $strSplit[$i];
7321
      unset($strSplit[$i]);
7322
7323
      foreach (self::split($c) as $c) {
7324
        $chars[] = $c;
7325
        $w .= ' ' === $c ? ' ' : '?';
7326
      }
7327
    }
7328
7329
    $strReturn = '';
7330
    $j = 0;
7331
    $b = $i = -1;
7332
    $w = wordwrap($w, $width, '#', $cut);
7333
7334
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7335
      for (++$i; $i < $b; ++$i) {
7336
        $strReturn .= $chars[$j];
7337
        unset($chars[$j++]);
7338
      }
7339
7340
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7341
        unset($chars[$j++]);
7342
      }
7343
7344
      $strReturn .= $break;
7345
    }
7346
7347
    return $strReturn . implode('', $chars);
7348
  }
7349
7350
  /**
7351
   * Returns an array of Unicode White Space characters.
7352
   *
7353
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7354
   */
7355
  public static function ws()
7356
  {
7357
    return self::$WHITESPACE;
7358
  }
7359
7360
}
7361