Completed
Push — master ( e075f4...0c7b1d )
by Lars
02:52
created

UTF8::substr_iright()   B

Complexity

Conditions 5
Paths 5

Size

Total Lines 24
Code Lines 13

Duplication

Lines 24
Ratio 100 %

Code Coverage

Tests 0
CRAP Score 30

Importance

Changes 0
Metric Value
dl 24
loc 24
ccs 0
cts 0
cp 0
rs 8.5125
c 0
b 0
f 0
cc 5
eloc 13
nc 5
nop 2
crap 30
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  /**
15
   * @var array
16
   */
17
  private static $WIN1252_TO_UTF8 = array(
18
      128 => "\xe2\x82\xac", // EURO SIGN
19
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
20
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
21
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
22
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
23
      134 => "\xe2\x80\xa0", // DAGGER
24
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
25
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
26
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
27
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
28
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
29
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
30
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
31
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
32
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
33
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
34
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
35
      149 => "\xe2\x80\xa2", // BULLET
36
      150 => "\xe2\x80\x93", // EN DASH
37
      151 => "\xe2\x80\x94", // EM DASH
38
      152 => "\xcb\x9c", // SMALL TILDE
39
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
40
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
41
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
42
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
43
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
44
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
45
  );
46
47
  /**
48
   * @var array
49
   */
50
  private static $CP1252_TO_UTF8 = array(
51
      '€' => '€',
52
      '‚' => '‚',
53
      'ƒ' => 'ƒ',
54
      '„' => '„',
55
      '…' => '…',
56
      '†' => '†',
57
      '‡' => '‡',
58
      'ˆ' => 'ˆ',
59
      '‰' => '‰',
60
      'Š' => 'Š',
61
      '‹' => '‹',
62
      'Œ' => 'Œ',
63
      'Ž' => 'Ž',
64
      '‘' => '‘',
65
      '’' => '’',
66
      '“' => '“',
67
      '”' => '”',
68
      '•' => '•',
69
      '–' => '–',
70
      '—' => '—',
71
      '˜' => '˜',
72
      '™' => '™',
73
      'š' => 'š',
74
      '›' => '›',
75
      'œ' => 'œ',
76
      'ž' => 'ž',
77
      'Ÿ' => 'Ÿ',
78
  );
79
80
  /**
81
   * Bom => Byte-Length
82
   *
83
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
84
   *
85
   * @var array
86
   */
87
  private static $BOM = array(
88
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
89
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
90
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
91
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
94
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
95
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
96
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
97
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
98
  );
99
100
  /**
101
   * Numeric code point => UTF-8 Character
102
   *
103
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
104
   *
105
   * @var array
106
   */
107
  private static $WHITESPACE = array(
108
    // NUL Byte
109
    0     => "\x0",
110
    // Tab
111
    9     => "\x9",
112
    // New Line
113
    10    => "\xa",
114
    // Vertical Tab
115
    11    => "\xb",
116
    // Carriage Return
117
    13    => "\xd",
118
    // Ordinary Space
119
    32    => "\x20",
120
    // NO-BREAK SPACE
121
    160   => "\xc2\xa0",
122
    // OGHAM SPACE MARK
123
    5760  => "\xe1\x9a\x80",
124
    // MONGOLIAN VOWEL SEPARATOR
125
    6158  => "\xe1\xa0\x8e",
126
    // EN QUAD
127
    8192  => "\xe2\x80\x80",
128
    // EM QUAD
129
    8193  => "\xe2\x80\x81",
130
    // EN SPACE
131
    8194  => "\xe2\x80\x82",
132
    // EM SPACE
133
    8195  => "\xe2\x80\x83",
134
    // THREE-PER-EM SPACE
135
    8196  => "\xe2\x80\x84",
136
    // FOUR-PER-EM SPACE
137
    8197  => "\xe2\x80\x85",
138
    // SIX-PER-EM SPACE
139
    8198  => "\xe2\x80\x86",
140
    // FIGURE SPACE
141
    8199  => "\xe2\x80\x87",
142
    // PUNCTUATION SPACE
143
    8200  => "\xe2\x80\x88",
144
    // THIN SPACE
145
    8201  => "\xe2\x80\x89",
146
    //HAIR SPACE
147
    8202  => "\xe2\x80\x8a",
148
    // LINE SEPARATOR
149
    8232  => "\xe2\x80\xa8",
150
    // PARAGRAPH SEPARATOR
151
    8233  => "\xe2\x80\xa9",
152
    // NARROW NO-BREAK SPACE
153
    8239  => "\xe2\x80\xaf",
154
    // MEDIUM MATHEMATICAL SPACE
155
    8287  => "\xe2\x81\x9f",
156
    // IDEOGRAPHIC SPACE
157
    12288 => "\xe3\x80\x80",
158
  );
159
160
  /**
161
   * @var array
162
   */
163
  private static $WHITESPACE_TABLE = array(
164
      'SPACE'                     => "\x20",
165
      'NO-BREAK SPACE'            => "\xc2\xa0",
166
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
167
      'EN QUAD'                   => "\xe2\x80\x80",
168
      'EM QUAD'                   => "\xe2\x80\x81",
169
      'EN SPACE'                  => "\xe2\x80\x82",
170
      'EM SPACE'                  => "\xe2\x80\x83",
171
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
172
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
173
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
174
      'FIGURE SPACE'              => "\xe2\x80\x87",
175
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
176
      'THIN SPACE'                => "\xe2\x80\x89",
177
      'HAIR SPACE'                => "\xe2\x80\x8a",
178
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
179
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
180
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
181
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
182
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
183
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
184
  );
185
186
  /**
187
   * bidirectional text chars
188
   *
189
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
190
   *
191
   * @var array
192
   */
193
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
194
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
195
    8234 => "\xE2\x80\xAA",
196
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
197
    8235 => "\xE2\x80\xAB",
198
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
199
    8236 => "\xE2\x80\xAC",
200
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
201
    8237 => "\xE2\x80\xAD",
202
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
203
    8238 => "\xE2\x80\xAE",
204
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
205
    8294 => "\xE2\x81\xA6",
206
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
207
    8295 => "\xE2\x81\xA7",
208
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
209
    8296 => "\xE2\x81\xA8",
210
    // POP DIRECTIONAL ISOLATE
211
    8297 => "\xE2\x81\xA9",
212
  );
213
214
  /**
215
   * @var array
216
   */
217
  private static $COMMON_CASE_FOLD = array(
218
      'ſ'            => 's',
219
      "\xCD\x85"     => 'ι',
220
      'ς'            => 'σ',
221
      "\xCF\x90"     => 'β',
222
      "\xCF\x91"     => 'θ',
223
      "\xCF\x95"     => 'φ',
224
      "\xCF\x96"     => 'π',
225
      "\xCF\xB0"     => 'κ',
226
      "\xCF\xB1"     => 'ρ',
227
      "\xCF\xB5"     => 'ε',
228
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
229
      "\xE1\xBE\xBE" => 'ι',
230
  );
231
232
  /**
233
   * @var array
234
   */
235
  private static $BROKEN_UTF8_FIX = array(
236
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
237
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
238
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
239
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
240
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
241
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
242
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
243
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
244
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
245
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
246
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
247
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
248
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
249
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
250
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
251
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
252
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
253
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
254
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
255
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
256
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
257
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
258
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
259
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
260
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
261
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
262
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
263
      'ü'       => 'ü',
264
      'ä'       => 'ä',
265
      'ö'       => 'ö',
266
      'Ö'       => 'Ö',
267
      'ß'       => 'ß',
268
      'Ã '       => 'à',
269
      'á'       => 'á',
270
      'â'       => 'â',
271
      'ã'       => 'ã',
272
      'ù'       => 'ù',
273
      'ú'       => 'ú',
274
      'û'       => 'û',
275
      'Ù'       => 'Ù',
276
      'Ú'       => 'Ú',
277
      'Û'       => 'Û',
278
      'Ü'       => 'Ü',
279
      'ò'       => 'ò',
280
      'ó'       => 'ó',
281
      'ô'       => 'ô',
282
      'è'       => 'è',
283
      'é'       => 'é',
284
      'ê'       => 'ê',
285
      'ë'       => 'ë',
286
      'À'       => 'À',
287
      'Á'       => 'Á',
288
      'Â'       => 'Â',
289
      'Ã'       => 'Ã',
290
      'Ä'       => 'Ä',
291
      'Ã…'       => 'Å',
292
      'Ç'       => 'Ç',
293
      'È'       => 'È',
294
      'É'       => 'É',
295
      'Ê'       => 'Ê',
296
      'Ë'       => 'Ë',
297
      'ÃŒ'       => 'Ì',
298
      'Í'       => 'Í',
299
      'ÃŽ'       => 'Î',
300
      'Ï'       => 'Ï',
301
      'Ñ'       => 'Ñ',
302
      'Ã’'       => 'Ò',
303
      'Ó'       => 'Ó',
304
      'Ô'       => 'Ô',
305
      'Õ'       => 'Õ',
306
      'Ø'       => 'Ø',
307
      'Ã¥'       => 'å',
308
      'æ'       => 'æ',
309
      'ç'       => 'ç',
310
      'ì'       => 'ì',
311
      'í'       => 'í',
312
      'î'       => 'î',
313
      'ï'       => 'ï',
314
      'ð'       => 'ð',
315
      'ñ'       => 'ñ',
316
      'õ'       => 'õ',
317
      'ø'       => 'ø',
318
      'ý'       => 'ý',
319
      'ÿ'       => 'ÿ',
320
      '€'      => '€',
321
      '’'      => '’',
322
  );
323
324
  /**
325
   * @var array
326
   */
327
  private static $UTF8_TO_WIN1252 = array(
328
      "\xe2\x82\xac" => "\x80", // EURO SIGN
329
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
330
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
331
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
332
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
333
      "\xe2\x80\xa0" => "\x86", // DAGGER
334
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
335
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
336
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
337
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
338
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
339
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
340
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
341
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
342
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
343
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
344
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
345
      "\xe2\x80\xa2" => "\x95", // BULLET
346
      "\xe2\x80\x93" => "\x96", // EN DASH
347
      "\xe2\x80\x94" => "\x97", // EM DASH
348
      "\xcb\x9c"     => "\x98", // SMALL TILDE
349
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
350
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
351
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
352
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
353
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
354
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
355
  );
356
357
  /**
358
   * @var array
359
   */
360
  private static $UTF8_MSWORD = array(
361
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
362
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
363
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
364
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
365
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
366
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
367
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
368
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
369
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
370
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
371
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
372
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
373
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
374
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
375
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
376
  );
377
378
  /**
379
   * @var array
380
   */
381
  private static $ICONV_ENCODING = array(
382
      'ANSI_X3.4-1968',
383
      'ANSI_X3.4-1986',
384
      'ASCII',
385
      'CP367',
386
      'IBM367',
387
      'ISO-IR-6',
388
      'ISO646-US',
389
      'ISO_646.IRV:1991',
390
      'US',
391
      'US-ASCII',
392
      'CSASCII',
393
      'UTF-8',
394
      'ISO-10646-UCS-2',
395
      'UCS-2',
396
      'CSUNICODE',
397
      'UCS-2BE',
398
      'UNICODE-1-1',
399
      'UNICODEBIG',
400
      'CSUNICODE11',
401
      'UCS-2LE',
402
      'UNICODELITTLE',
403
      'ISO-10646-UCS-4',
404
      'UCS-4',
405
      'CSUCS4',
406
      'UCS-4BE',
407
      'UCS-4LE',
408
      'UTF-16',
409
      'UTF-16BE',
410
      'UTF-16LE',
411
      'UTF-32',
412
      'UTF-32BE',
413
      'UTF-32LE',
414
      'UNICODE-1-1-UTF-7',
415
      'UTF-7',
416
      'CSUNICODE11UTF7',
417
      'UCS-2-INTERNAL',
418
      'UCS-2-SWAPPED',
419
      'UCS-4-INTERNAL',
420
      'UCS-4-SWAPPED',
421
      'C99',
422
      'JAVA',
423
      'CP819',
424
      'IBM819',
425
      'ISO-8859-1',
426
      'ISO-IR-100',
427
      'ISO8859-1',
428
      'ISO_8859-1',
429
      'ISO_8859-1:1987',
430
      'L1',
431
      'LATIN1',
432
      'CSISOLATIN1',
433
      'ISO-8859-2',
434
      'ISO-IR-101',
435
      'ISO8859-2',
436
      'ISO_8859-2',
437
      'ISO_8859-2:1987',
438
      'L2',
439
      'LATIN2',
440
      'CSISOLATIN2',
441
      'ISO-8859-3',
442
      'ISO-IR-109',
443
      'ISO8859-3',
444
      'ISO_8859-3',
445
      'ISO_8859-3:1988',
446
      'L3',
447
      'LATIN3',
448
      'CSISOLATIN3',
449
      'ISO-8859-4',
450
      'ISO-IR-110',
451
      'ISO8859-4',
452
      'ISO_8859-4',
453
      'ISO_8859-4:1988',
454
      'L4',
455
      'LATIN4',
456
      'CSISOLATIN4',
457
      'CYRILLIC',
458
      'ISO-8859-5',
459
      'ISO-IR-144',
460
      'ISO8859-5',
461
      'ISO_8859-5',
462
      'ISO_8859-5:1988',
463
      'CSISOLATINCYRILLIC',
464
      'ARABIC',
465
      'ASMO-708',
466
      'ECMA-114',
467
      'ISO-8859-6',
468
      'ISO-IR-127',
469
      'ISO8859-6',
470
      'ISO_8859-6',
471
      'ISO_8859-6:1987',
472
      'CSISOLATINARABIC',
473
      'ECMA-118',
474
      'ELOT_928',
475
      'GREEK',
476
      'GREEK8',
477
      'ISO-8859-7',
478
      'ISO-IR-126',
479
      'ISO8859-7',
480
      'ISO_8859-7',
481
      'ISO_8859-7:1987',
482
      'ISO_8859-7:2003',
483
      'CSISOLATINGREEK',
484
      'HEBREW',
485
      'ISO-8859-8',
486
      'ISO-IR-138',
487
      'ISO8859-8',
488
      'ISO_8859-8',
489
      'ISO_8859-8:1988',
490
      'CSISOLATINHEBREW',
491
      'ISO-8859-9',
492
      'ISO-IR-148',
493
      'ISO8859-9',
494
      'ISO_8859-9',
495
      'ISO_8859-9:1989',
496
      'L5',
497
      'LATIN5',
498
      'CSISOLATIN5',
499
      'ISO-8859-10',
500
      'ISO-IR-157',
501
      'ISO8859-10',
502
      'ISO_8859-10',
503
      'ISO_8859-10:1992',
504
      'L6',
505
      'LATIN6',
506
      'CSISOLATIN6',
507
      'ISO-8859-11',
508
      'ISO8859-11',
509
      'ISO_8859-11',
510
      'ISO-8859-13',
511
      'ISO-IR-179',
512
      'ISO8859-13',
513
      'ISO_8859-13',
514
      'L7',
515
      'LATIN7',
516
      'ISO-8859-14',
517
      'ISO-CELTIC',
518
      'ISO-IR-199',
519
      'ISO8859-14',
520
      'ISO_8859-14',
521
      'ISO_8859-14:1998',
522
      'L8',
523
      'LATIN8',
524
      'ISO-8859-15',
525
      'ISO-IR-203',
526
      'ISO8859-15',
527
      'ISO_8859-15',
528
      'ISO_8859-15:1998',
529
      'LATIN-9',
530
      'ISO-8859-16',
531
      'ISO-IR-226',
532
      'ISO8859-16',
533
      'ISO_8859-16',
534
      'ISO_8859-16:2001',
535
      'L10',
536
      'LATIN10',
537
      'KOI8-R',
538
      'CSKOI8R',
539
      'KOI8-U',
540
      'KOI8-RU',
541
      'CP1250',
542
      'MS-EE',
543
      'WINDOWS-1250',
544
      'CP1251',
545
      'MS-CYRL',
546
      'WINDOWS-1251',
547
      'CP1252',
548
      'MS-ANSI',
549
      'WINDOWS-1252',
550
      'CP1253',
551
      'MS-GREEK',
552
      'WINDOWS-1253',
553
      'CP1254',
554
      'MS-TURK',
555
      'WINDOWS-1254',
556
      'CP1255',
557
      'MS-HEBR',
558
      'WINDOWS-1255',
559
      'CP1256',
560
      'MS-ARAB',
561
      'WINDOWS-1256',
562
      'CP1257',
563
      'WINBALTRIM',
564
      'WINDOWS-1257',
565
      'CP1258',
566
      'WINDOWS-1258',
567
      '850',
568
      'CP850',
569
      'IBM850',
570
      'CSPC850MULTILINGUAL',
571
      '862',
572
      'CP862',
573
      'IBM862',
574
      'CSPC862LATINHEBREW',
575
      '866',
576
      'CP866',
577
      'IBM866',
578
      'CSIBM866',
579
      'MAC',
580
      'MACINTOSH',
581
      'MACROMAN',
582
      'CSMACINTOSH',
583
      'MACCENTRALEUROPE',
584
      'MACICELAND',
585
      'MACCROATIAN',
586
      'MACROMANIA',
587
      'MACCYRILLIC',
588
      'MACUKRAINE',
589
      'MACGREEK',
590
      'MACTURKISH',
591
      'MACHEBREW',
592
      'MACARABIC',
593
      'MACTHAI',
594
      'HP-ROMAN8',
595
      'R8',
596
      'ROMAN8',
597
      'CSHPROMAN8',
598
      'NEXTSTEP',
599
      'ARMSCII-8',
600
      'GEORGIAN-ACADEMY',
601
      'GEORGIAN-PS',
602
      'KOI8-T',
603
      'CP154',
604
      'CYRILLIC-ASIAN',
605
      'PT154',
606
      'PTCP154',
607
      'CSPTCP154',
608
      'KZ-1048',
609
      'RK1048',
610
      'STRK1048-2002',
611
      'CSKZ1048',
612
      'MULELAO-1',
613
      'CP1133',
614
      'IBM-CP1133',
615
      'ISO-IR-166',
616
      'TIS-620',
617
      'TIS620',
618
      'TIS620-0',
619
      'TIS620.2529-1',
620
      'TIS620.2533-0',
621
      'TIS620.2533-1',
622
      'CP874',
623
      'WINDOWS-874',
624
      'VISCII',
625
      'VISCII1.1-1',
626
      'CSVISCII',
627
      'TCVN',
628
      'TCVN-5712',
629
      'TCVN5712-1',
630
      'TCVN5712-1:1993',
631
      'ISO-IR-14',
632
      'ISO646-JP',
633
      'JIS_C6220-1969-RO',
634
      'JP',
635
      'CSISO14JISC6220RO',
636
      'JISX0201-1976',
637
      'JIS_X0201',
638
      'X0201',
639
      'CSHALFWIDTHKATAKANA',
640
      'ISO-IR-87',
641
      'JIS0208',
642
      'JIS_C6226-1983',
643
      'JIS_X0208',
644
      'JIS_X0208-1983',
645
      'JIS_X0208-1990',
646
      'X0208',
647
      'CSISO87JISX0208',
648
      'ISO-IR-159',
649
      'JIS_X0212',
650
      'JIS_X0212-1990',
651
      'JIS_X0212.1990-0',
652
      'X0212',
653
      'CSISO159JISX02121990',
654
      'CN',
655
      'GB_1988-80',
656
      'ISO-IR-57',
657
      'ISO646-CN',
658
      'CSISO57GB1988',
659
      'CHINESE',
660
      'GB_2312-80',
661
      'ISO-IR-58',
662
      'CSISO58GB231280',
663
      'CN-GB-ISOIR165',
664
      'ISO-IR-165',
665
      'ISO-IR-149',
666
      'KOREAN',
667
      'KSC_5601',
668
      'KS_C_5601-1987',
669
      'KS_C_5601-1989',
670
      'CSKSC56011987',
671
      'EUC-JP',
672
      'EUCJP',
673
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
674
      'CSEUCPKDFMTJAPANESE',
675
      'MS_KANJI',
676
      'SHIFT-JIS',
677
      'SHIFT_JIS',
678
      'SJIS',
679
      'CSSHIFTJIS',
680
      'CP932',
681
      'ISO-2022-JP',
682
      'CSISO2022JP',
683
      'ISO-2022-JP-1',
684
      'ISO-2022-JP-2',
685
      'CSISO2022JP2',
686
      'CN-GB',
687
      'EUC-CN',
688
      'EUCCN',
689
      'GB2312',
690
      'CSGB2312',
691
      'GBK',
692
      'CP936',
693
      'MS936',
694
      'WINDOWS-936',
695
      'GB18030',
696
      'ISO-2022-CN',
697
      'CSISO2022CN',
698
      'ISO-2022-CN-EXT',
699
      'HZ',
700
      'HZ-GB-2312',
701
      'EUC-TW',
702
      'EUCTW',
703
      'CSEUCTW',
704
      'BIG-5',
705
      'BIG-FIVE',
706
      'BIG5',
707
      'BIGFIVE',
708
      'CN-BIG5',
709
      'CSBIG5',
710
      'CP950',
711
      'BIG5-HKSCS:1999',
712
      'BIG5-HKSCS:2001',
713
      'BIG5-HKSCS',
714
      'BIG5-HKSCS:2004',
715
      'BIG5HKSCS',
716
      'EUC-KR',
717
      'EUCKR',
718
      'CSEUCKR',
719
      'CP949',
720
      'UHC',
721
      'CP1361',
722
      'JOHAB',
723
      'ISO-2022-KR',
724
      'CSISO2022KR',
725
      'CP856',
726
      'CP922',
727
      'CP943',
728
      'CP1046',
729
      'CP1124',
730
      'CP1129',
731
      'CP1161',
732
      'IBM-1161',
733
      'IBM1161',
734
      'CSIBM1161',
735
      'CP1162',
736
      'IBM-1162',
737
      'IBM1162',
738
      'CSIBM1162',
739
      'CP1163',
740
      'IBM-1163',
741
      'IBM1163',
742
      'CSIBM1163',
743
      'DEC-KANJI',
744
      'DEC-HANYU',
745
      '437',
746
      'CP437',
747
      'IBM437',
748
      'CSPC8CODEPAGE437',
749
      'CP737',
750
      'CP775',
751
      'IBM775',
752
      'CSPC775BALTIC',
753
      '852',
754
      'CP852',
755
      'IBM852',
756
      'CSPCP852',
757
      'CP853',
758
      '855',
759
      'CP855',
760
      'IBM855',
761
      'CSIBM855',
762
      '857',
763
      'CP857',
764
      'IBM857',
765
      'CSIBM857',
766
      'CP858',
767
      '860',
768
      'CP860',
769
      'IBM860',
770
      'CSIBM860',
771
      '861',
772
      'CP-IS',
773
      'CP861',
774
      'IBM861',
775
      'CSIBM861',
776
      '863',
777
      'CP863',
778
      'IBM863',
779
      'CSIBM863',
780
      'CP864',
781
      'IBM864',
782
      'CSIBM864',
783
      '865',
784
      'CP865',
785
      'IBM865',
786
      'CSIBM865',
787
      '869',
788
      'CP-GR',
789
      'CP869',
790
      'IBM869',
791
      'CSIBM869',
792
      'CP1125',
793
      'EUC-JISX0213',
794
      'SHIFT_JISX0213',
795
      'ISO-2022-JP-3',
796
      'BIG5-2003',
797
      'ISO-IR-230',
798
      'TDS565',
799
      'ATARI',
800
      'ATARIST',
801
      'RISCOS-LATIN1',
802
  );
803
804
  /**
805
   * @var array
806
   */
807 1
  private static $SUPPORT = array();
808
809 1
  /**
810 1
   * __construct()
811
   */
812
  public function __construct()
813
  {
814
    self::checkForSupport();
815
  }
816
817
  /**
818
   * Return the character at the specified position: $str[1] like functionality.
819
   *
820 2
   * @param string $str <p>A UTF-8 string.</p>
821
   * @param int    $pos <p>The position of character to return.</p>
822 2
   *
823
   * @return string <p>Single Multi-Byte character.</p>
824
   */
825
  public static function access($str, $pos)
826
  {
827
    $str = (string)$str;
828
    $pos = (int)$pos;
829
830
    if (!isset($str[0])) {
831
      return '';
832
    }
833
834 1
    if ($pos < 0) {
835
      return '';
836 1
    }
837 1
838 1
    return (string)self::substr($str, $pos, 1);
839
  }
840 1
841
  /**
842
   * Prepends UTF-8 BOM character to the string and returns the whole string.
843
   *
844
   * INFO: If BOM already existed there, the Input string is returned.
845
   *
846
   * @param string $str <p>The input string.</p>
847
   *
848
   * @return string <p>The output string that contains BOM.</p>
849
   */
850 1
  public static function add_bom_to_string($str)
851
  {
852 1
    if (self::string_has_bom($str) === false) {
853
      $str = self::bom() . $str;
854
    }
855
856
    return $str;
857
  }
858
859
  /**
860 2
   * Convert binary into an string.
861
   *
862 2
   * @param mixed $bin 1|0
863
   *
864
   * @return string
865
   */
866
  public static function binary_to_str($bin)
867
  {
868
    if (!isset($bin[0])) {
869
      return '';
870
    }
871
872
    return pack('H*', base_convert($bin, 2, 16));
873
  }
874 1
875
  /**
876 1
   * Returns the UTF-8 Byte Order Mark Character.
877
   *
878
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
879
   *
880
   * @return string UTF-8 Byte Order Mark
881
   */
882
  public static function bom()
883
  {
884 2
    return "\xef\xbb\xbf";
885
  }
886 2
887
  /**
888 1
   * @alias of UTF8::chr_map()
889
   *
890 1
   * @see   UTF8::chr_map()
891 1
   *
892 1
   * @param string|array $callback
893 1
   * @param string       $str
894 1
   *
895 1
   * @return array
896 2
   */
897
  public static function callback($callback, $str)
898
  {
899
    return self::chr_map($callback, $str);
900
  }
901
902
  /**
903
   * This method will auto-detect your server environment for UTF-8 support.
904
   *
905
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
906
   */
907 9
  public static function checkForSupport()
908
  {
909 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
910 9
911 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
912
913
      // http://php.net/manual/en/book.mbstring.php
914 9
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
915
916
      if (
917
          defined('MB_OVERLOAD_STRING')
918 9
          &&
919
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
920
      ) {
921
        self::$SUPPORT['mbstring_func_overload'] = true;
922
      } else {
923 9
        self::$SUPPORT['mbstring_func_overload'] = false;
924 9
      }
925 8
926
      // http://php.net/manual/en/book.iconv.php
927
      self::$SUPPORT['iconv'] = self::iconv_loaded();
928
929 8
      // http://php.net/manual/en/book.intl.php
930 6
      self::$SUPPORT['intl'] = self::intl_loaded();
931
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
932
      if (
933 7
          self::$SUPPORT['intl'] === true
934 6
          &&
935 6
          function_exists('transliterator_list_ids') === true
936
      ) {
937
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
938 7
      }
939 7
940 7
      // http://php.net/manual/en/class.intlchar.php
941 7
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
942
943
      // http://php.net/manual/en/book.pcre.php
944 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
945 1
    }
946 1
  }
947 1
948 1
  /**
949
   * Generates a UTF-8 encoded character from the given code point.
950
   *
951
   * INFO: opposite to UTF8::ord()
952
   *
953
   * @param int    $code_point <p>The code point for which to generate a character.</p>
954
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
955
   *
956
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
957
   */
958
  public static function chr($code_point, $encoding = 'UTF-8')
959
  {
960
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
961
      self::checkForSupport();
962
    }
963 1
964
    if ($encoding !== 'UTF-8') {
965 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
966
    } elseif (self::$SUPPORT['intlChar'] === true) {
967 1
      return \IntlChar::chr($code_point);
968
    }
969
970
    // check type of code_point, only if there is no support for "\IntlChar"
971
    $i = (int)$code_point;
972
    if ($i !== $code_point) {
973
      return null;
974
    }
975
976
    // use static cache, only if there is no support for "\IntlChar"
977
    static $CHAR_CACHE = array();
978
    $cacheKey = $code_point . $encoding;
979
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
980
      return $CHAR_CACHE[$cacheKey];
981
    }
982 4
983
    if (0x80 > $code_point %= 0x200000) {
984 4
      $str = self::chr_and_parse_int($code_point);
985 3
    } elseif (0x800 > $code_point) {
986
      $str = self::chr_and_parse_int(0xC0 | $code_point >> 6) .
987
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
988 4
    } elseif (0x10000 > $code_point) {
989
      $str = self::chr_and_parse_int(0xE0 | $code_point >> 12) .
990
             self::chr_and_parse_int(0x80 | $code_point >> 6 & 0x3F) .
991
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
992
    } else {
993
      $str = self::chr_and_parse_int(0xF0 | $code_point >> 18) .
994
             self::chr_and_parse_int(0x80 | $code_point >> 12 & 0x3F) .
995
             self::chr_and_parse_int(0x80 | $code_point >> 6 & 0x3F) .
996
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
997
    }
998 2
999
    if ($encoding !== 'UTF-8') {
1000 2
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1001 2
    }
1002 2
1003
    // add into static cache
1004 2
    $CHAR_CACHE[$cacheKey] = $str;
1005
1006 2
    return $str;
1007
  }
1008
1009 2
  /**
1010
   * @param int $int
1011 2
   *
1012 2
   * @return string
1013 2
   */
1014
  private static function chr_and_parse_int($int)
1015 1
  {
1016 1
    return chr((int)$int);
1017 1
  }
1018
1019
  /**
1020
   * Applies callback to all characters of a string.
1021
   *
1022
   * @param string|array $callback <p>The callback function.</p>
1023 2
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1024
   *
1025 2
   * @return array <p>The outcome of callback.</p>
1026 2
   */
1027
  public static function chr_map($callback, $str)
1028 2
  {
1029
    $chars = self::split($str);
1030
1031
    return array_map($callback, $chars);
1032
  }
1033
1034
  /**
1035
   * Generates an array of byte length of each character of a Unicode string.
1036
   *
1037
   * 1 byte => U+0000  - U+007F
1038
   * 2 byte => U+0080  - U+07FF
1039 1
   * 3 byte => U+0800  - U+FFFF
1040
   * 4 byte => U+10000 - U+10FFFF
1041 1
   *
1042
   * @param string $str <p>The original Unicode string.</p>
1043
   *
1044
   * @return array <p>An array of byte lengths of each character.</p>
1045
   */
1046
  public static function chr_size_list($str)
1047
  {
1048
    $str = (string)$str;
1049
1050
    if (!isset($str[0])) {
1051
      return array();
1052
    }
1053 1
1054
    return array_map(
1055 1
        function ($data) {
1056
          return UTF8::strlen($data, '8BIT');
1057
        },
1058
        self::split($str)
1059
    );
1060
  }
1061
1062
  /**
1063
   * Get a decimal code representation of a specific character.
1064
   *
1065
   * @param string $char <p>The input character.</p>
1066
   *
1067
   * @return int
1068
   */
1069
  public static function chr_to_decimal($char)
1070
  {
1071 44
    $char = (string)$char;
1072
    $code = self::ord($char[0]);
1073
    $bytes = 1;
1074
1075
    if (!($code & 0x80)) {
1076
      // 0xxxxxxx
1077
      return $code;
1078
    }
1079
1080
    if (($code & 0xe0) === 0xc0) {
1081
      // 110xxxxx
1082
      $bytes = 2;
1083
      $code &= ~0xc0;
1084
    } elseif (($code & 0xf0) === 0xe0) {
1085
      // 1110xxxx
1086 44
      $bytes = 3;
1087 44
      $code &= ~0xe0;
1088
    } elseif (($code & 0xf8) === 0xf0) {
1089 44
      // 11110xxx
1090 44
      $bytes = 4;
1091
      $code &= ~0xf0;
1092 44
    }
1093 17
1094 17
    for ($i = 2; $i <= $bytes; $i++) {
1095
      // 10xxxxxx
1096 44
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1097 12
    }
1098 12
1099
    return $code;
1100 44
  }
1101 5
1102 5
  /**
1103
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1104 44
   *
1105
   * @param string $char <p>The input character</p>
1106
   * @param string $pfix [optional]
1107
   *
1108
   * @return string <p>The code point encoded as U+xxxx<p>
1109
   */
1110
  public static function chr_to_hex($char, $pfix = 'U+')
1111
  {
1112
    $char = (string)$char;
1113
1114 4
    if (!isset($char[0])) {
1115
      return '';
1116 4
    }
1117
1118 4
    if ($char === '&#0;') {
1119 1
      $char = '';
1120
    }
1121
1122
    return self::int_to_hex(self::ord($char), $pfix);
1123 4
  }
1124
1125
  /**
1126
   * alias for "UTF8::chr_to_decimal()"
1127
   *
1128
   * @see UTF8::chr_to_decimal()
1129
   *
1130 4
   * @param string $chr
1131
   *
1132 4
   * @return int
1133
   */
1134
  public static function chr_to_int($chr)
1135
  {
1136
    return self::chr_to_decimal($chr);
1137
  }
1138
1139
  /**
1140
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1141
   *
1142
   * @param string $body     <p>The original string to be split.</p>
1143
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1144
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1145
   *
1146 5
   * @return string <p>The chunked string</p>
1147
   */
1148 5
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1149 5
  {
1150 5
    return implode($end, self::split($body, $chunklen));
1151
  }
1152 5
1153
  /**
1154 5
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1155 5
   *
1156 5
   * @param string $str                     <p>The string to be sanitized.</p>
1157
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1158 5
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1159
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1160 5
   *                                        => "..."</p>
1161 1
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1162
   *                                        $normalize_whitespace</p>
1163 1
   *
1164 1
   * @return string <p>Clean UTF-8 encoded string.</p>
1165 1
   */
1166
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1167 1
  {
1168 1
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1169
    // caused connection reset problem on larger strings
1170 5
1171
    $regx = '/
1172
      (
1173
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1174
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1175
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1176
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1177
        ){1,100}                      # ...one or more times
1178
      )
1179
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1180
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1181
    /x';
1182 6
    $str = preg_replace($regx, '$1', $str);
1183
1184 6
    $str = self::replace_diamond_question_mark($str, '');
1185
    $str = self::remove_invisible_characters($str);
1186
1187
    if ($normalize_whitespace === true) {
1188
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1189
    }
1190
1191
    if ($normalize_msword === true) {
1192
      $str = self::normalize_msword($str);
1193
    }
1194 1
1195
    if ($remove_bom === true) {
1196 1
      $str = self::remove_bom($str);
1197 1
    }
1198 1
1199
    return $str;
1200 1
  }
1201
1202
  /**
1203
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1204
   *
1205
   * @param string $str <p>The input string.</p>
1206
   *
1207
   * @return string
1208
   */
1209
  public static function cleanup($str)
1210
  {
1211
    $str = (string)$str;
1212
1213
    if (!isset($str[0])) {
1214
      return '';
1215
    }
1216 11
1217
    // fixed ISO <-> UTF-8 Errors
1218 11
    $str = self::fix_simple_utf8($str);
1219 11
1220
    // remove all none UTF-8 symbols
1221 11
    // && remove diamond question mark (�)
1222 5
    // && remove remove invisible characters (e.g. "\0")
1223
    // && remove BOM
1224
    // && normalize whitespace chars (but keep non-breaking-spaces)
1225 11
    $str = self::clean($str, true, true, false, true);
1226 1
1227 1
    return (string)$str;
1228
  }
1229 11
1230
  /**
1231
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1232
   *
1233 11
   * INFO: opposite to UTF8::string()
1234
   *
1235
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1236 11
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1237
   *                                    default, code points will be returned as integers.</p>
1238 1
   *
1239 11
   * @return array <p>The array of code points.</p>
1240
   */
1241
  public static function codepoints($arg, $u_style = false)
1242
  {
1243 11
    if (is_string($arg) === true) {
1244
      $arg = self::split($arg);
1245
    }
1246 11
1247 1
    $arg = array_map(
1248 1
        array(
1249 1
            '\\voku\\helper\\UTF8',
1250 11
            'ord',
1251 11
        ),
1252
        $arg
1253
    );
1254
1255
    if ($u_style) {
1256 2
      $arg = array_map(
1257
          array(
1258
              '\\voku\\helper\\UTF8',
1259 1
              'int_to_hex',
1260
          ),
1261
          $arg
1262 2
      );
1263 1
    }
1264
1265
    return $arg;
1266 2
  }
1267 2
1268 2
  /**
1269
   * Returns count of characters used in a string.
1270 2
   *
1271
   * @param string $str       <p>The input string.</p>
1272 2
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1273 2
   *
1274
   * @return array <p>An associative array of Character as keys and
1275
   *               their count as values.</p>
1276
   */
1277 1
  public static function count_chars($str, $cleanUtf8 = false)
1278
  {
1279
    return array_count_values(self::split($str, 1, $cleanUtf8));
1280
  }
1281
1282
  /**
1283
   * Converts a int-value into an UTF-8 character.
1284
   *
1285
   * @param mixed $int
1286
   *
1287
   * @return string
1288
   */
1289
  public static function decimal_to_chr($int)
1290
  {
1291
    if (Bootup::is_php('5.4') === true) {
1292
      $flags = ENT_QUOTES | ENT_HTML5;
1293
    } else {
1294
      $flags = ENT_QUOTES;
1295
    }
1296
1297
    return self::html_entity_decode('&#' . $int . ';', $flags);
1298
  }
1299
1300
  /**
1301
   * Encode a string with a new charset-encoding.
1302
   *
1303
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1304
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1305
   *
1306
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1307
   * @param string $str      <p>The input string</p>
1308
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1309
   *                         /> otherwise we auto-detect the current string-encoding</p>
1310
   *
1311
   * @return string
1312
   */
1313
  public static function encode($encoding, $str, $force = true)
1314
  {
1315
    $str = (string)$str;
1316
    $encoding = (string)$encoding;
1317
1318
    if (!isset($str[0], $encoding[0])) {
1319
      return $str;
1320
    }
1321
1322
    if ($encoding !== 'UTF-8') {
1323
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1324
    }
1325
1326
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1327
      self::checkForSupport();
1328
    }
1329
1330
    $encodingDetected = self::str_detect_encoding($str);
1331
1332
    if (
1333
        $encodingDetected !== false
1334
        &&
1335
        (
1336
            $force === true
1337
            ||
1338
            $encodingDetected !== $encoding
1339
        )
1340
    ) {
1341
1342
      if (
1343
          $encoding === 'UTF-8'
1344
          &&
1345
          (
1346
              $force === true
1347
              || $encodingDetected === 'UTF-8'
1348
              || $encodingDetected === 'WINDOWS-1252'
1349
              || $encodingDetected === 'ISO-8859-1'
1350
          )
1351
      ) {
1352
        return self::to_utf8($str);
1353
      }
1354
1355
      if (
1356
          $encoding === 'ISO-8859-1'
1357
          &&
1358
          (
1359
              $force === true
1360
              || $encodingDetected === 'ISO-8859-1'
1361
              || $encodingDetected === 'UTF-8'
1362 2
          )
1363
      ) {
1364
        return self::to_iso8859($str);
1365 2
      }
1366 2
1367
      if (
1368 2
          $encoding !== 'UTF-8'
1369 2
          &&
1370
          $encoding !== 'WINDOWS-1252'
1371
          &&
1372
          self::$SUPPORT['mbstring'] === false
1373 2
      ) {
1374 2
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1375
      }
1376 2
1377 2
      $strEncoded = \mb_convert_encoding(
1378
          $str,
1379 2
          $encoding,
1380 1
          $encodingDetected
1381 1
      );
1382 2
1383
      if ($strEncoded) {
1384
        return $strEncoded;
1385
      }
1386 2
    }
1387 1
1388
    return $str;
1389
  }
1390 1
1391 1
  /**
1392 1
   * Reads entire file into a string.
1393 1
   *
1394
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1395 1
   *
1396
   * @link http://php.net/manual/en/function.file-get-contents.php
1397
   *
1398
   * @param string        $filename      <p>
1399
   *                                     Name of the file to read.
1400
   *                                     </p>
1401
   * @param int|false     $flags         [optional] <p>
1402
   *                                     Prior to PHP 6, this parameter is called
1403
   *                                     use_include_path and is a bool.
1404
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1405 1
   *                                     to trigger include path
1406
   *                                     search.
1407 1
   *                                     </p>
1408
   *                                     <p>
1409
   *                                     The value of flags can be any combination of
1410
   *                                     the following flags (with some restrictions), joined with the
1411
   *                                     binary OR (|)
1412
   *                                     operator.
1413
   *                                     </p>
1414
   *                                     <p>
1415
   *                                     <table>
1416
   *                                     Available flags
1417
   *                                     <tr valign="top">
1418
   *                                     <td>Flag</td>
1419 9
   *                                     <td>Description</td>
1420
   *                                     </tr>
1421 9
   *                                     <tr valign="top">
1422 9
   *                                     <td>
1423 3
   *                                     FILE_USE_INCLUDE_PATH
1424
   *                                     </td>
1425 3
   *                                     <td>
1426 3
   *                                     Search for filename in the include directory.
1427 3
   *                                     See include_path for more
1428 9
   *                                     information.
1429 2
   *                                     </td>
1430 2
   *                                     </tr>
1431 2
   *                                     <tr valign="top">
1432 2
   *                                     <td>
1433 9
   *                                     FILE_TEXT
1434
   *                                     </td>
1435 8
   *                                     <td>
1436
   *                                     As of PHP 6, the default encoding of the read
1437 2
   *                                     data is UTF-8. You can specify a different encoding by creating a
1438 2
   *                                     custom context or by changing the default using
1439
   *                                     stream_default_encoding. This flag cannot be
1440 8
   *                                     used with FILE_BINARY.
1441
   *                                     </td>
1442 8
   *                                     </tr>
1443 6
   *                                     <tr valign="top">
1444 6
   *                                     <td>
1445 6
   *                                     FILE_BINARY
1446
   *                                     </td>
1447 6
   *                                     <td>
1448 3
   *                                     With this flag, the file is read in binary mode. This is the default
1449 3
   *                                     setting and cannot be used with FILE_TEXT.
1450 5
   *                                     </td>
1451
   *                                     </tr>
1452
   *                                     </table>
1453
   *                                     </p>
1454
   * @param resource|null $context       [optional] <p>
1455 8
   *                                     A valid context resource created with
1456 8
   *                                     stream_context_create. If you don't need to use a
1457 5
   *                                     custom context, you can skip this parameter by &null;.
1458 8
   *                                     </p>
1459
   * @param int|null      $offset        [optional] <p>
1460
   *                                     The offset where the reading starts.
1461 2
   *                                     </p>
1462 2
   * @param int|null      $maxlen        [optional] <p>
1463 8
   *                                     Maximum length of data read. The default is to read until end
1464 8
   *                                     of file is reached.
1465 9
   *                                     </p>
1466
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1467 9
   *
1468
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1469
   *                                     or pdf, because they used non default utf-8 chars</p>
1470
   *
1471
   * @return string <p>The function returns the read data or false on failure.</p>
1472
   */
1473
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1474
  {
1475
    // init
1476
    $timeout = (int)$timeout;
1477
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1478
1479
    if ($timeout && $context === null) {
1480
      $context = stream_context_create(
1481
          array(
1482
              'http' =>
1483
                  array(
1484
                      'timeout' => $timeout,
1485
                  ),
1486
          )
1487
      );
1488
    }
1489
1490
    if (!$flags) {
1491
      $flags = false;
1492
    }
1493
1494
    if ($offset === null) {
1495
      $offset = 0;
1496
    }
1497
1498
    if (is_int($maxlen) === true) {
1499
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1500
    } else {
1501
      $data = file_get_contents($filename, $flags, $context, $offset);
1502
    }
1503
1504
    // return false on error
1505
    if ($data === false) {
1506
      return false;
1507
    }
1508
1509
    if ($convertToUtf8 === true) {
1510
      $data = self::encode('UTF-8', $data, false);
1511
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1512
    }
1513
1514
    return $data;
1515
  }
1516
1517
  /**
1518
   * Checks if a file starts with BOM (Byte Order Mark) character.
1519
   *
1520 1
   * @param string $file_path <p>Path to a valid file.</p>
1521
   *
1522 1
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1523 1
   */
1524 1
  public static function file_has_bom($file_path)
1525 1
  {
1526
    return self::string_has_bom(file_get_contents($file_path));
1527
  }
1528 1
1529
  /**
1530
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1531
   *
1532
   * @param mixed  $var
1533
   * @param int    $normalization_form
1534
   * @param string $leading_combining
1535
   *
1536
   * @return mixed
1537
   */
1538
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1539
  {
1540 1
    switch (gettype($var)) {
1541 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1542 1
        foreach ($var as $k => $v) {
1543 1
          /** @noinspection AlterInForeachInspection */
1544 1
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1545 1
        }
1546
        break;
1547 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1548 1
        foreach ($var as $k => $v) {
1549
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1550
        }
1551
        break;
1552
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1553
1554
        if (false !== strpos($var, "\r")) {
1555
          // Workaround https://bugs.php.net/65732
1556
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1557
        }
1558
1559 1
        if (self::is_ascii($var) === false) {
1560
          /** @noinspection PhpUndefinedClassInspection */
1561 1
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1562
            $n = '-';
1563
          } else {
1564
            /** @noinspection PhpUndefinedClassInspection */
1565
            $n = \Normalizer::normalize($var, $normalization_form);
1566
1567
            if (isset($n[0])) {
1568
              $var = $n;
1569
            } else {
1570
              $var = self::encode('UTF-8', $var);
1571
            }
1572
          }
1573
1574
          if (
1575
              $var[0] >= "\x80"
1576
              &&
1577 7
              isset($n[0], $leading_combining[0])
1578
              &&
1579 7
              preg_match('/^\p{Mn}/u', $var)
1580 7
          ) {
1581
            // Prevent leading combining chars
1582 7
            // for NFC-safe concatenations.
1583
            $var = $leading_combining . $var;
1584 7
          }
1585 2
        }
1586
1587
        break;
1588 7
    }
1589 1
1590 1
    return $var;
1591 1
  }
1592
1593 7
  /**
1594
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1595
   *
1596
   * Gets a specific external variable by name and optionally filters it
1597
   *
1598
   * @link  http://php.net/manual/en/function.filter-input.php
1599
   *
1600
   * @param int    $type          <p>
1601
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1602
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1603 1
   *                              <b>INPUT_ENV</b>.
1604
   *                              </p>
1605 1
   * @param string $variable_name <p>
1606
   *                              Name of a variable to get.
1607 1
   *                              </p>
1608
   * @param int    $filter        [optional] <p>
1609
   *                              The ID of the filter to apply. The
1610 1
   *                              manual page lists the available filters.
1611 1
   *                              </p>
1612
   * @param mixed  $options       [optional] <p>
1613 1
   *                              Associative array of options or bitwise disjunction of flags. If filter
1614
   *                              accepts options, flags can be provided in "flags" field of array.
1615
   *                              </p>
1616 1
   *
1617 1
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1618 1
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1619 1
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1620 1
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1621
   * @since 5.2.0
1622 1
   */
1623 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1624
  {
1625
    if (4 > func_num_args()) {
1626
      $var = filter_input($type, $variable_name, $filter);
1627
    } else {
1628
      $var = filter_input($type, $variable_name, $filter, $options);
1629
    }
1630
1631
    return self::filter($var);
1632 1
  }
1633
1634 1
  /**
1635
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1636
   *
1637
   * Gets external variables and optionally filters them
1638 1
   *
1639
   * @link  http://php.net/manual/en/function.filter-input-array.php
1640
   *
1641
   * @param int   $type       <p>
1642
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1643
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1644
   *                          <b>INPUT_ENV</b>.
1645
   *                          </p>
1646
   * @param mixed $definition [optional] <p>
1647
   *                          An array defining the arguments. A valid key is a string
1648
   *                          containing a variable name and a valid value is either a filter type, or an array
1649
   *                          optionally specifying the filter, flags and options. If the value is an
1650
   *                          array, valid keys are filter which specifies the
1651
   *                          filter type,
1652
   *                          flags which specifies any flags that apply to the
1653
   *                          filter, and options which specifies any options that
1654 1
   *                          apply to the filter. See the example below for a better understanding.
1655
   *                          </p>
1656 1
   *                          <p>
1657 1
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1658
   *                          input array are filtered by this filter.
1659
   *                          </p>
1660 1
   * @param bool  $add_empty  [optional] <p>
1661
   *                          Add missing keys as <b>NULL</b> to the return value.
1662 1
   *                          </p>
1663 1
   *
1664 1
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1665 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1666 1
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1667 1
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1668 1
   * fails.
1669 1
   * @since 5.2.0
1670 1
   */
1671 1 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1672 1
  {
1673
    if (2 > func_num_args()) {
1674
      $a = filter_input_array($type);
1675
    } else {
1676
      $a = filter_input_array($type, $definition, $add_empty);
1677
    }
1678
1679
    return self::filter($a);
1680
  }
1681
1682
  /**
1683
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1684
   *
1685
   * Filters a variable with a specified filter
1686
   *
1687
   * @link  http://php.net/manual/en/function.filter-var.php
1688
   *
1689
   * @param mixed $variable <p>
1690
   *                        Value to filter.
1691
   *                        </p>
1692 1
   * @param int   $filter   [optional] <p>
1693 1
   *                        The ID of the filter to apply. The
1694
   *                        manual page lists the available filters.
1695
   *                        </p>
1696
   * @param mixed $options  [optional] <p>
1697
   *                        Associative array of options or bitwise disjunction of flags. If filter
1698
   *                        accepts options, flags can be provided in "flags" field of array. For
1699
   *                        the "callback" filter, callable type should be passed. The
1700
   *                        callback must accept one argument, the value to be filtered, and return
1701
   *                        the value after filtering/sanitizing it.
1702
   *                        </p>
1703
   *                        <p>
1704
   *                        <code>
1705
   *                        // for filters that accept options, use this format
1706
   *                        $options = array(
1707
   *                        'options' => array(
1708
   *                        'default' => 3, // value to return if the filter fails
1709
   *                        // other options here
1710
   *                        'min_range' => 0
1711
   *                        ),
1712
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1713
   *                        );
1714
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1715
   *                        // for filter that only accept flags, you can pass them directly
1716
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1717
   *                        // for filter that only accept flags, you can also pass as an array
1718
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1719
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1720
   *                        // callback validate filter
1721
   *                        function foo($value)
1722
   *                        {
1723
   *                        // Expected format: Surname, GivenNames
1724
   *                        if (strpos($value, ", ") === false) return false;
1725
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1726
   *                        $empty = (empty($surname) || empty($givennames));
1727
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1728
   *                        if ($empty || $notstrings) {
1729
   *                        return false;
1730
   *                        } else {
1731
   *                        return $value;
1732
   *                        }
1733
   *                        }
1734
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1735
   *                        </code>
1736
   *                        </p>
1737
   *
1738
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1739
   * @since 5.2.0
1740
   */
1741 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1742
  {
1743
    if (3 > func_num_args()) {
1744
      $variable = filter_var($variable, $filter);
1745
    } else {
1746
      $variable = filter_var($variable, $filter, $options);
1747
    }
1748
1749
    return self::filter($variable);
1750
  }
1751
1752 1
  /**
1753
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1754 1
   *
1755 1
   * Gets multiple variables and optionally filters them
1756
   *
1757 1
   * @link  http://php.net/manual/en/function.filter-var-array.php
1758
   *
1759
   * @param array $data       <p>
1760
   *                          An array with string keys containing the data to filter.
1761
   *                          </p>
1762
   * @param mixed $definition [optional] <p>
1763
   *                          An array defining the arguments. A valid key is a string
1764
   *                          containing a variable name and a valid value is either a
1765
   *                          filter type, or an
1766
   *                          array optionally specifying the filter, flags and options.
1767
   *                          If the value is an array, valid keys are filter
1768
   *                          which specifies the filter type,
1769
   *                          flags which specifies any flags that apply to the
1770
   *                          filter, and options which specifies any options that
1771
   *                          apply to the filter. See the example below for a better understanding.
1772 1
   *                          </p>
1773
   *                          <p>
1774 1
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1775
   *                          input array are filtered by this filter.
1776
   *                          </p>
1777
   * @param bool  $add_empty  [optional] <p>
1778
   *                          Add missing keys as <b>NULL</b> to the return value.
1779
   *                          </p>
1780
   *
1781
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1782
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1783
   * the variable is not set.
1784
   * @since 5.2.0
1785
   */
1786 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1787
  {
1788 1
    if (2 > func_num_args()) {
1789 1
      $a = filter_var_array($data);
1790
    } else {
1791
      $a = filter_var_array($data, $definition, $add_empty);
1792 1
    }
1793 1
1794
    return self::filter($a);
1795
  }
1796 1
1797
  /**
1798
   * Check if the number of unicode characters are not more than the specified integer.
1799
   *
1800
   * @param string $str      The original string to be checked.
1801
   * @param int    $box_size The size in number of chars to be checked against string.
1802
   *
1803
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1804
   */
1805
  public static function fits_inside($str, $box_size)
1806
  {
1807
    return (self::strlen($str) <= $box_size);
1808
  }
1809
1810 1
  /**
1811
   * Try to fix simple broken UTF-8 strings.
1812 1
   *
1813
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1814
   *
1815
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1816
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1817
   * See: http://en.wikipedia.org/wiki/Windows-1252
1818
   *
1819
   * @param string $str <p>The input string</p>
1820
   *
1821
   * @return string
1822
   */
1823 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1824
  {
1825
    // init
1826 2
    $str = (string)$str;
1827
1828
    if (!isset($str[0])) {
1829 2
      return '';
1830
    }
1831 2
1832 2
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1833 1
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1834 1
1835
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1836 2
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1837 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1838 1
    }
1839
1840 2
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1841 2
  }
1842 2
1843
  /**
1844 2
   * Fix a double (or multiple) encoded UTF8 string.
1845
   *
1846
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1847
   *
1848
   * @return string|string[] <p>Will return the fixed input-"array" or
1849
   *                         the fixed input-"string".</p>
1850
   */
1851
  public static function fix_utf8($str)
1852
  {
1853
    if (is_array($str) === true) {
1854
1855
      /** @noinspection ForeachSourceInspection */
1856
      foreach ($str as $k => $v) {
1857
        /** @noinspection AlterInForeachInspection */
1858
        /** @noinspection OffsetOperationsInspection */
1859
        $str[$k] = self::fix_utf8($v);
1860
      }
1861
1862
      return $str;
1863
    }
1864
1865
    $last = '';
1866
    while ($last !== $str) {
1867
      $last = $str;
1868
      $str = self::to_utf8(
1869
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1868 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1870
      );
1871
    }
1872
1873
    return $str;
1874
  }
1875
1876
  /**
1877
   * Get character of a specific character.
1878
   *
1879
   * @param string $char
1880
   *
1881
   * @return string <p>'RTL' or 'LTR'</p>
1882
   */
1883
  public static function getCharDirection($char)
1884
  {
1885
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1886
      self::checkForSupport();
1887
    }
1888
1889
    if (self::$SUPPORT['intlChar'] === true) {
1890
      $tmpReturn = \IntlChar::charDirection($char);
1891
1892
      // from "IntlChar"-Class
1893
      $charDirection = array(
1894
          'RTL' => array(1, 13, 14, 15, 21),
1895
          'LTR' => array(0, 11, 12, 20),
1896
      );
1897
1898
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1899
        return 'LTR';
1900
      }
1901
1902
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1903
        return 'RTL';
1904
      }
1905
    }
1906
1907
    $c = static::chr_to_decimal($char);
1908
1909
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1910
      return 'LTR';
1911
    }
1912
1913
    if (0x85e >= $c) {
1914
1915
      if (0x5be === $c ||
1916
          0x5c0 === $c ||
1917
          0x5c3 === $c ||
1918
          0x5c6 === $c ||
1919
          (0x5d0 <= $c && 0x5ea >= $c) ||
1920
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1921
          0x608 === $c ||
1922
          0x60b === $c ||
1923
          0x60d === $c ||
1924
          0x61b === $c ||
1925
          (0x61e <= $c && 0x64a >= $c) ||
1926 9
          (0x66d <= $c && 0x66f >= $c) ||
1927
          (0x671 <= $c && 0x6d5 >= $c) ||
1928 9
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1929
          (0x6ee <= $c && 0x6ef >= $c) ||
1930 9
          (0x6fa <= $c && 0x70d >= $c) ||
1931 6
          0x710 === $c ||
1932
          (0x712 <= $c && 0x72f >= $c) ||
1933
          (0x74d <= $c && 0x7a5 >= $c) ||
1934 9
          0x7b1 === $c ||
1935 7
          (0x7c0 <= $c && 0x7ea >= $c) ||
1936
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1937
          0x7fa === $c ||
1938
          (0x800 <= $c && 0x815 >= $c) ||
1939 9
          0x81a === $c ||
1940 9
          0x824 === $c ||
1941
          0x828 === $c ||
1942 9
          (0x830 <= $c && 0x83e >= $c) ||
1943 9
          (0x840 <= $c && 0x858 >= $c) ||
1944 9
          0x85e === $c
1945 9
      ) {
1946 9
        return 'RTL';
1947 6
      }
1948
1949
    } elseif (0x200f === $c) {
1950 9
1951 2
      return 'RTL';
1952 2
1953
    } elseif (0xfb1d <= $c) {
1954 9
1955 4
      if (0xfb1d === $c ||
1956 4
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1957 4
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1958
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1959
          0xfb3e === $c ||
1960 4
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1961
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1962
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1963 9
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1964
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1965 9
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1966 9
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1967
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1968 7
          (0xfe76 <= $c && 0xfefc >= $c) ||
1969
          (0x10800 <= $c && 0x10805 >= $c) ||
1970 7
          0x10808 === $c ||
1971 6
          (0x1080a <= $c && 0x10835 >= $c) ||
1972
          (0x10837 <= $c && 0x10838 >= $c) ||
1973 4
          0x1083c === $c ||
1974
          (0x1083f <= $c && 0x10855 >= $c) ||
1975 9
          (0x10857 <= $c && 0x1085f >= $c) ||
1976
          (0x10900 <= $c && 0x1091b >= $c) ||
1977 9
          (0x10920 <= $c && 0x10939 >= $c) ||
1978
          0x1093f === $c ||
1979
          0x10a00 === $c ||
1980 9
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1981 9
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1982 9
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1983
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1984 9
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1985
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1986 9
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1987
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1988 9
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1989
          (0x10b78 <= $c && 0x10b7f >= $c)
1990
      ) {
1991
        return 'RTL';
1992
      }
1993
    }
1994
1995
    return 'LTR';
1996
  }
1997
1998
  /**
1999
   * get data from "/data/*.ser"
2000
   *
2001
   * @param string $file
2002
   *
2003
   * @return bool|string|array|int <p>Will return false on error.</p>
2004
   */
2005
  private static function getData($file)
2006
  {
2007
    $file = __DIR__ . '/data/' . $file . '.php';
2008
    if (file_exists($file)) {
2009
      /** @noinspection PhpIncludeInspection */
2010
      return require $file;
2011
    }
2012
2013
    return false;
2014
  }
2015
2016
  /**
2017
   * Check for php-support.
2018
   *
2019
   * @param string|null $key
2020
   *
2021
   * @return mixed <p>Return the full support-"array", if $key === null<br />
2022
   *               return bool-value, if $key is used and available<br />
2023
   *               otherwise return null</p>
2024
   */
2025
  public static function getSupportInfo($key = null)
2026
  {
2027
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2028
      self::checkForSupport();
2029
    }
2030
2031
    if ($key === null) {
2032
      return self::$SUPPORT;
2033
    }
2034
2035
    if (!isset(self::$SUPPORT[$key])) {
2036
      return null;
2037
    }
2038
2039
    return self::$SUPPORT[$key];
2040
  }
2041
2042
  /**
2043
   * alias for "UTF8::string_has_bom()"
2044
   *
2045
   * @see UTF8::string_has_bom()
2046
   *
2047
   * @param string $str
2048
   *
2049
   * @return bool
2050
   *
2051
   * @deprecated
2052
   */
2053
  public static function hasBom($str)
2054
  {
2055
    return self::string_has_bom($str);
2056
  }
2057
2058
  /**
2059
   * Converts a hexadecimal-value into an UTF-8 character.
2060
   *
2061
   * @param string $hexdec <p>The hexadecimal value.</p>
2062
   *
2063
   * @return string|false <p>One single UTF-8 character.</p>
2064
   */
2065
  public static function hex_to_chr($hexdec)
2066
  {
2067
    return self::decimal_to_chr(hexdec($hexdec));
2068
  }
2069
2070
  /**
2071
   * Converts hexadecimal U+xxxx code point representation to integer.
2072
   *
2073
   * INFO: opposite to UTF8::int_to_hex()
2074
   *
2075
   * @param string $hexdec <p>The hexadecimal code point representation.</p>
2076
   *
2077
   * @return int|false <p>The code point, or false on failure.</p>
2078
   */
2079
  public static function hex_to_int($hexdec)
2080
  {
2081
    $hexdec = (string)$hexdec;
2082
2083
    if (!isset($hexdec[0])) {
2084
      return false;
2085
    }
2086
2087
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexdec, $match)) {
2088
      return intval($match[1], 16);
2089
    }
2090
2091
    return false;
2092
  }
2093
2094 2
  /**
2095
   * alias for "UTF8::html_entity_decode()"
2096 2
   *
2097 1
   * @see UTF8::html_entity_decode()
2098 1
   *
2099
   * @param string $str
2100 2
   * @param int    $flags
2101
   * @param string $encoding
2102 2
   *
2103 1
   * @return string
2104
   */
2105
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2106 2
  {
2107 2
    return self::html_entity_decode($str, $flags, $encoding);
2108 2
  }
2109 2
2110 2
  /**
2111 1
   * Converts a UTF-8 string to a series of HTML numbered entities.
2112
   *
2113 1
   * INFO: opposite to UTF8::html_decode()
2114 1
   *
2115 1
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2116 1
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2117 1
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2118 2
   *
2119
   * @return string <p>HTML numbered entities.</p>
2120 2
   */
2121
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2122
  {
2123
    // init
2124
    $str = (string)$str;
2125
2126
    if (!isset($str[0])) {
2127
      return '';
2128
    }
2129
2130
    if ($encoding !== 'UTF-8') {
2131
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2132
    }
2133
2134
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2135
    if (function_exists('mb_encode_numericentity')) {
2136
2137
      $startCode = 0x00;
2138
      if ($keepAsciiChars === true) {
2139
        $startCode = 0x80;
2140
      }
2141
2142
      return mb_encode_numericentity(
2143
          $str,
2144
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2145
          $encoding
2146
      );
2147
    }
2148
2149
    return implode(
2150
        '',
2151
        array_map(
2152
            function ($data) use ($keepAsciiChars, $encoding) {
2153
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2154
            },
2155
            self::split($str)
2156
        )
2157
    );
2158
  }
2159
2160
  /**
2161
   * UTF-8 version of html_entity_decode()
2162
   *
2163
   * The reason we are not using html_entity_decode() by itself is because
2164
   * while it is not technically correct to leave out the semicolon
2165
   * at the end of an entity most browsers will still interpret the entity
2166
   * correctly. html_entity_decode() does not convert entities without
2167
   * semicolons, so we are left with our own little solution here. Bummer.
2168
   *
2169
   * Convert all HTML entities to their applicable characters
2170
   *
2171
   * INFO: opposite to UTF8::html_encode()
2172
   *
2173
   * @link http://php.net/manual/en/function.html-entity-decode.php
2174
   *
2175
   * @param string $str      <p>
2176
   *                         The input string.
2177
   *                         </p>
2178
   * @param int    $flags    [optional] <p>
2179
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2180
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2181
   *                         <table>
2182
   *                         Available <i>flags</i> constants
2183
   *                         <tr valign="top">
2184
   *                         <td>Constant Name</td>
2185
   *                         <td>Description</td>
2186
   *                         </tr>
2187
   *                         <tr valign="top">
2188
   *                         <td><b>ENT_COMPAT</b></td>
2189
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2190
   *                         </tr>
2191
   *                         <tr valign="top">
2192
   *                         <td><b>ENT_QUOTES</b></td>
2193
   *                         <td>Will convert both double and single quotes.</td>
2194
   *                         </tr>
2195
   *                         <tr valign="top">
2196
   *                         <td><b>ENT_NOQUOTES</b></td>
2197
   *                         <td>Will leave both double and single quotes unconverted.</td>
2198
   *                         </tr>
2199
   *                         <tr valign="top">
2200
   *                         <td><b>ENT_HTML401</b></td>
2201
   *                         <td>
2202
   *                         Handle code as HTML 4.01.
2203
   *                         </td>
2204
   *                         </tr>
2205
   *                         <tr valign="top">
2206
   *                         <td><b>ENT_XML1</b></td>
2207
   *                         <td>
2208
   *                         Handle code as XML 1.
2209
   *                         </td>
2210
   *                         </tr>
2211
   *                         <tr valign="top">
2212
   *                         <td><b>ENT_XHTML</b></td>
2213
   *                         <td>
2214
   *                         Handle code as XHTML.
2215
   *                         </td>
2216
   *                         </tr>
2217
   *                         <tr valign="top">
2218
   *                         <td><b>ENT_HTML5</b></td>
2219
   *                         <td>
2220
   *                         Handle code as HTML 5.
2221
   *                         </td>
2222
   *                         </tr>
2223
   *                         </table>
2224
   *                         </p>
2225
   * @param string $encoding [optional] <p>Encoding to use.</p>
2226
   *
2227
   * @return string <p>The decoded string.</p>
2228
   */
2229
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2230
  {
2231
    // init
2232 1
    $str = (string)$str;
2233
2234 1
    if (!isset($str[0])) {
2235
      return '';
2236
    }
2237
2238 1
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2239
      return $str;
2240
    }
2241
2242
    if (
2243
        strpos($str, '&') === false
2244
        ||
2245
        (
2246 1
            strpos($str, '&#') === false
2247
            &&
2248 1
            strpos($str, ';') === false
2249
        )
2250
    ) {
2251
      return $str;
2252
    }
2253
2254
    if ($encoding !== 'UTF-8') {
2255
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2256
    }
2257
2258
    if ($flags === null) {
2259
      if (Bootup::is_php('5.4') === true) {
2260
        $flags = ENT_QUOTES | ENT_HTML5;
2261 3
      } else {
2262
        $flags = ENT_QUOTES;
2263 3
      }
2264 3
    }
2265
2266 3
    do {
2267
      $str_compare = $str;
2268 3
2269
      $str = preg_replace_callback(
2270
          "/&#\d{2,6};/",
2271
          function ($matches) use ($encoding) {
2272
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2273
2274
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2275
              return $returnTmp;
2276
            }
2277
2278
            return $matches[0];
2279 1
          },
2280
          $str
2281 1
      );
2282
2283
      // decode numeric & UTF16 two byte entities
2284
      $str = html_entity_decode(
2285
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2286
          $flags,
2287
          $encoding
2288
      );
2289 2
2290
    } while ($str_compare !== $str);
2291 2
2292
    return $str;
2293
  }
2294
2295
  /**
2296
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2297
   *
2298
   * @link http://php.net/manual/en/function.htmlentities.php
2299
   *
2300
   * @param string $str           <p>
2301
   *                              The input string.
2302
   *                              </p>
2303 2
   * @param int    $flags         [optional] <p>
2304
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2305 2
   *                              invalid code unit sequences and the used document type. The default is
2306
   *                              ENT_COMPAT | ENT_HTML401.
2307
   *                              <table>
2308
   *                              Available <i>flags</i> constants
2309
   *                              <tr valign="top">
2310
   *                              <td>Constant Name</td>
2311
   *                              <td>Description</td>
2312
   *                              </tr>
2313
   *                              <tr valign="top">
2314
   *                              <td><b>ENT_COMPAT</b></td>
2315
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2316
   *                              </tr>
2317 1
   *                              <tr valign="top">
2318
   *                              <td><b>ENT_QUOTES</b></td>
2319 1
   *                              <td>Will convert both double and single quotes.</td>
2320
   *                              </tr>
2321
   *                              <tr valign="top">
2322
   *                              <td><b>ENT_NOQUOTES</b></td>
2323
   *                              <td>Will leave both double and single quotes unconverted.</td>
2324
   *                              </tr>
2325
   *                              <tr valign="top">
2326
   *                              <td><b>ENT_IGNORE</b></td>
2327
   *                              <td>
2328
   *                              Silently discard invalid code unit sequences instead of returning
2329
   *                              an empty string. Using this flag is discouraged as it
2330
   *                              may have security implications.
2331
   *                              </td>
2332
   *                              </tr>
2333
   *                              <tr valign="top">
2334
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2335
   *                              <td>
2336
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2337
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2338
   *                              </td>
2339
   *                              </tr>
2340
   *                              <tr valign="top">
2341
   *                              <td><b>ENT_DISALLOWED</b></td>
2342
   *                              <td>
2343
   *                              Replace invalid code points for the given document type with a
2344
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2345
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2346
   *                              instance, to ensure the well-formedness of XML documents with
2347
   *                              embedded external content.
2348
   *                              </td>
2349
   *                              </tr>
2350
   *                              <tr valign="top">
2351
   *                              <td><b>ENT_HTML401</b></td>
2352
   *                              <td>
2353
   *                              Handle code as HTML 4.01.
2354
   *                              </td>
2355
   *                              </tr>
2356
   *                              <tr valign="top">
2357
   *                              <td><b>ENT_XML1</b></td>
2358
   *                              <td>
2359 1
   *                              Handle code as XML 1.
2360
   *                              </td>
2361 1
   *                              </tr>
2362
   *                              <tr valign="top">
2363
   *                              <td><b>ENT_XHTML</b></td>
2364
   *                              <td>
2365
   *                              Handle code as XHTML.
2366
   *                              </td>
2367
   *                              </tr>
2368
   *                              <tr valign="top">
2369
   *                              <td><b>ENT_HTML5</b></td>
2370
   *                              <td>
2371
   *                              Handle code as HTML 5.
2372
   *                              </td>
2373
   *                              </tr>
2374
   *                              </table>
2375
   *                              </p>
2376
   * @param string $encoding      [optional] <p>
2377
   *                              Like <b>htmlspecialchars</b>,
2378
   *                              <b>htmlentities</b> takes an optional third argument
2379
   *                              <i>encoding</i> which defines encoding used in
2380
   *                              conversion.
2381
   *                              Although this argument is technically optional, you are highly
2382
   *                              encouraged to specify the correct value for your code.
2383
   *                              </p>
2384
   * @param bool   $double_encode [optional] <p>
2385
   *                              When <i>double_encode</i> is turned off PHP will not
2386
   *                              encode existing html entities. The default is to convert everything.
2387 1
   *                              </p>
2388
   *
2389 1
   *
2390
   * @return string the encoded string.
2391
   * </p>
2392
   * <p>
2393
   * If the input <i>string</i> contains an invalid code unit
2394
   * sequence within the given <i>encoding</i> an empty string
2395
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2396
   * <b>ENT_SUBSTITUTE</b> flags are set.
2397
   */
2398
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2399
  {
2400
    if ($encoding !== 'UTF-8') {
2401 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2402
    }
2403 1
2404
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2405
2406
    if ($encoding !== 'UTF-8') {
2407
      return $str;
2408
    }
2409
2410
    $byteLengths = self::chr_size_list($str);
2411
    $search = array();
2412
    $replacements = array();
2413
    foreach ($byteLengths as $counter => $byteLength) {
2414
      if ($byteLength >= 3) {
2415
        $char = self::access($str, $counter);
2416 16
2417
        if (!isset($replacements[$char])) {
2418 16
          $search[$char] = $char;
2419
          $replacements[$char] = self::html_encode($char);
2420
        }
2421
      }
2422
    }
2423
2424
    return str_replace($search, $replacements, $str);
2425
  }
2426
2427
  /**
2428
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2429
   *
2430
   * INFO: Take a look at "UTF8::htmlentities()"
2431 28
   *
2432
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2433 28
   *
2434
   * @param string $str           <p>
2435 28
   *                              The string being converted.
2436 5
   *                              </p>
2437
   * @param int    $flags         [optional] <p>
2438
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2439 28
   *                              invalid code unit sequences and the used document type. The default is
2440
   *                              ENT_COMPAT | ENT_HTML401.
2441
   *                              <table>
2442
   *                              Available <i>flags</i> constants
2443
   *                              <tr valign="top">
2444
   *                              <td>Constant Name</td>
2445
   *                              <td>Description</td>
2446
   *                              </tr>
2447
   *                              <tr valign="top">
2448
   *                              <td><b>ENT_COMPAT</b></td>
2449 1
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2450
   *                              </tr>
2451 1
   *                              <tr valign="top">
2452
   *                              <td><b>ENT_QUOTES</b></td>
2453 1
   *                              <td>Will convert both double and single quotes.</td>
2454 1
   *                              </tr>
2455
   *                              <tr valign="top">
2456
   *                              <td><b>ENT_NOQUOTES</b></td>
2457 1
   *                              <td>Will leave both double and single quotes unconverted.</td>
2458 1
   *                              </tr>
2459
   *                              <tr valign="top">
2460 1
   *                              <td><b>ENT_IGNORE</b></td>
2461
   *                              <td>
2462
   *                              Silently discard invalid code unit sequences instead of returning
2463
   *                              an empty string. Using this flag is discouraged as it
2464
   *                              may have security implications.
2465
   *                              </td>
2466
   *                              </tr>
2467
   *                              <tr valign="top">
2468
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2469
   *                              <td>
2470
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2471 16
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2472
   *                              </td>
2473
   *                              </tr>
2474 16
   *                              <tr valign="top">
2475
   *                              <td><b>ENT_DISALLOWED</b></td>
2476
   *                              <td>
2477 16
   *                              Replace invalid code points for the given document type with a
2478
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2479 16
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2480 16
   *                              instance, to ensure the well-formedness of XML documents with
2481 15
   *                              embedded external content.
2482 16
   *                              </td>
2483 6
   *                              </tr>
2484
   *                              <tr valign="top">
2485 15
   *                              <td><b>ENT_HTML401</b></td>
2486
   *                              <td>
2487
   *                              Handle code as HTML 4.01.
2488
   *                              </td>
2489
   *                              </tr>
2490
   *                              <tr valign="top">
2491
   *                              <td><b>ENT_XML1</b></td>
2492
   *                              <td>
2493
   *                              Handle code as XML 1.
2494
   *                              </td>
2495
   *                              </tr>
2496
   *                              <tr valign="top">
2497
   *                              <td><b>ENT_XHTML</b></td>
2498
   *                              <td>
2499
   *                              Handle code as XHTML.
2500
   *                              </td>
2501
   *                              </tr>
2502
   *                              <tr valign="top">
2503
   *                              <td><b>ENT_HTML5</b></td>
2504
   *                              <td>
2505
   *                              Handle code as HTML 5.
2506
   *                              </td>
2507
   *                              </tr>
2508
   *                              </table>
2509
   *                              </p>
2510
   * @param string $encoding      [optional] <p>
2511
   *                              Defines encoding used in conversion.
2512
   *                              </p>
2513
   *                              <p>
2514
   *                              For the purposes of this function, the encodings
2515
   *                              ISO-8859-1, ISO-8859-15,
2516
   *                              UTF-8, cp866,
2517
   *                              cp1251, cp1252, and
2518
   *                              KOI8-R are effectively equivalent, provided the
2519
   *                              <i>string</i> itself is valid for the encoding, as
2520
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2521
   *                              the same positions in all of these encodings.
2522
   *                              </p>
2523
   * @param bool   $double_encode [optional] <p>
2524
   *                              When <i>double_encode</i> is turned off PHP will not
2525
   *                              encode existing html entities, the default is to convert everything.
2526
   *                              </p>
2527
   *
2528
   * @return string The converted string.
2529
   * </p>
2530
   * <p>
2531
   * If the input <i>string</i> contains an invalid code unit
2532
   * sequence within the given <i>encoding</i> an empty string
2533
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2534
   * <b>ENT_SUBSTITUTE</b> flags are set.
2535
   */
2536 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2537
  {
2538 1
    if ($encoding !== 'UTF-8') {
2539
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2540 1
    }
2541
2542
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2543
  }
2544
2545 1
  /**
2546
   * Checks whether iconv is available on the server.
2547 1
   *
2548
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2549 1
   */
2550 1
  public static function iconv_loaded()
2551
  {
2552 1
    $return = extension_loaded('iconv') ? true : false;
2553
2554
    // INFO: "default_charset" is already set by the "Bootup"-class
2555
2556
    if (Bootup::is_php('5.6') === false) {
2557
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2558
      iconv_set_encoding('input_encoding', 'UTF-8');
2559
      iconv_set_encoding('output_encoding', 'UTF-8');
2560
      iconv_set_encoding('internal_encoding', 'UTF-8');
2561
    }
2562
2563 1
    return $return;
2564
  }
2565 1
2566
  /**
2567 1
   * alias for "UTF8::decimal_to_chr()"
2568
   *
2569
   * @see UTF8::decimal_to_chr()
2570
   *
2571
   * @param mixed $int
2572 1
   *
2573 1
   * @return string
2574 1
   */
2575 1
  public static function int_to_chr($int)
2576 1
  {
2577
    return self::decimal_to_chr($int);
2578 1
  }
2579
2580
  /**
2581
   * Converts Integer to hexadecimal U+xxxx code point representation.
2582
   *
2583
   * INFO: opposite to UTF8::hex_to_int()
2584
   *
2585
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2586
   * @param string $pfix [optional]
2587
   *
2588
   * @return string <p>The code point, or empty string on failure.</p>
2589
   */
2590
  public static function int_to_hex($int, $pfix = 'U+')
2591
  {
2592
    if ((int)$int === $int) {
2593 4
      $hex = dechex($int);
2594
2595 4
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2596
2597 4
      return $pfix . $hex;
2598
    }
2599 4
2600 4
    return '';
2601 4
  }
2602 4
2603 4
  /**
2604 4
   * Checks whether intl-char is available on the server.
2605 4
   *
2606 4
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2607 4
   */
2608 2
  public static function intlChar_loaded()
2609 2
  {
2610 4
    return (
2611 4
        Bootup::is_php('7.0') === true
2612 4
        &&
2613
        class_exists('IntlChar') === true
2614 4
    );
2615 4
  }
2616 4
2617 4
  /**
2618 4
   * Checks whether intl is available on the server.
2619 4
   *
2620 4
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2621 4
   */
2622 4
  public static function intl_loaded()
2623 3
  {
2624 3
    return extension_loaded('intl') ? true : false;
2625 4
  }
2626 4
2627 4
  /**
2628
   * alias for "UTF8::is_ascii()"
2629 4
   *
2630 3
   * @see UTF8::is_ascii()
2631 2
   *
2632
   * @param string $str
2633 3
   *
2634
   * @return boolean
2635
   *
2636
   * @deprecated
2637 3
   */
2638
  public static function isAscii($str)
2639 3
  {
2640
    return self::is_ascii($str);
2641
  }
2642
2643
  /**
2644
   * alias for "UTF8::is_base64()"
2645
   *
2646
   * @see UTF8::is_base64()
2647
   *
2648
   * @param string $str
2649
   *
2650
   * @return bool
2651
   *
2652
   * @deprecated
2653 3
   */
2654
  public static function isBase64($str)
2655 3
  {
2656
    return self::is_base64($str);
2657 3
  }
2658
2659 3
  /**
2660 3
   * alias for "UTF8::is_binary()"
2661 3
   *
2662 3
   * @see UTF8::is_binary()
2663 3
   *
2664 3
   * @param string $str
2665 3
   *
2666 3
   * @return bool
2667 3
   *
2668 1
   * @deprecated
2669 1
   */
2670 3
  public static function isBinary($str)
2671 3
  {
2672 3
    return self::is_binary($str);
2673
  }
2674 3
2675 3
  /**
2676 3
   * alias for "UTF8::is_bom()"
2677 3
   *
2678 3
   * @see UTF8::is_bom()
2679 3
   *
2680 3
   * @param string $utf8_chr
2681 3
   *
2682 3
   * @return boolean
2683 1
   *
2684 1
   * @deprecated
2685 3
   */
2686 3
  public static function isBom($utf8_chr)
2687 3
  {
2688
    return self::is_bom($utf8_chr);
2689 3
  }
2690 1
2691 1
  /**
2692
   * alias for "UTF8::is_html()"
2693 1
   *
2694
   * @see UTF8::is_html()
2695
   *
2696
   * @param string $str
2697 3
   *
2698
   * @return boolean
2699 3
   *
2700
   * @deprecated
2701
   */
2702
  public static function isHtml($str)
2703
  {
2704
    return self::is_html($str);
2705
  }
2706
2707
  /**
2708
   * alias for "UTF8::is_json()"
2709
   *
2710
   * @see UTF8::is_json()
2711
   *
2712 43
   * @param string $str
2713
   *
2714 43
   * @return bool
2715
   *
2716 43
   * @deprecated
2717 3
   */
2718
  public static function isJson($str)
2719
  {
2720 41
    return self::is_json($str);
2721 1
  }
2722 1
2723
  /**
2724
   * alias for "UTF8::is_utf16()"
2725
   *
2726
   * @see UTF8::is_utf16()
2727
   *
2728
   * @param string $str
2729
   *
2730 41
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2731
   *
2732
   * @deprecated
2733
   */
2734
  public static function isUtf16($str)
2735
  {
2736
    return self::is_utf16($str);
2737
  }
2738
2739
  /**
2740 41
   * alias for "UTF8::is_utf32()"
2741
   *
2742 41
   * @see UTF8::is_utf32()
2743 41
   *
2744 41
   * @param string $str
2745
   *
2746
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2747 41
   *
2748 41
   * @deprecated
2749 41
   */
2750
  public static function isUtf32($str)
2751
  {
2752 41
    return self::is_utf32($str);
2753
  }
2754 36
2755 41
  /**
2756
   * alias for "UTF8::is_utf8()"
2757 34
   *
2758 34
   * @see UTF8::is_utf8()
2759 34
   *
2760 34
   * @param string $str
2761 39
   * @param bool   $strict
2762
   *
2763 21
   * @return bool
2764 21
   *
2765 21
   * @deprecated
2766 21
   */
2767 33
  public static function isUtf8($str, $strict = false)
2768
  {
2769 9
    return self::is_utf8($str, $strict);
2770 9
  }
2771 9
2772 9
  /**
2773 16
   * Checks if a string is 7 bit ASCII.
2774
   *
2775
   * @param string $str <p>The string to check.</p>
2776
   *
2777
   * @return bool <p>
2778
   *              <strong>true</strong> if it is ASCII<br />
2779
   *              <strong>false</strong> otherwise
2780
   *              </p>
2781
   */
2782 3
  public static function is_ascii($str)
2783 3
  {
2784 3
    $str = (string)$str;
2785 3
2786 9
    if (!isset($str[0])) {
2787
      return true;
2788 3
    }
2789 3
2790 3
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2791 3
  }
2792 3
2793
  /**
2794
   * Returns true if the string is base64 encoded, false otherwise.
2795
   *
2796 5
   * @param string $str <p>The input string.</p>
2797
   *
2798 41
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2799
   */
2800
  public static function is_base64($str)
2801 36
  {
2802
    $str = (string)$str;
2803 33
2804 33
    if (!isset($str[0])) {
2805 33
      return false;
2806 33
    }
2807
2808
    $base64String = (string)base64_decode($str, true);
2809
    if ($base64String && base64_encode($base64String) === $str) {
2810
      return true;
2811 33
    }
2812
2813
    return false;
2814
  }
2815
2816
  /**
2817 33
   * Check if the input is binary... (is look like a hack).
2818 33
   *
2819 33
   * @param mixed $input
2820 33
   *
2821
   * @return bool
2822 33
   */
2823
  public static function is_binary($input)
2824 33
  {
2825 33
    $input = (string)$input;
2826 5
2827
    if (!isset($input[0])) {
2828
      return false;
2829 33
    }
2830 33
2831 33
    if (preg_match('~^[01]+$~', $input)) {
2832 33
      return true;
2833 33
    }
2834
2835
    $testLength = strlen($input);
2836
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2837
      return true;
2838 18
    }
2839
2840
    if (substr_count($input, "\x00") > 0) {
2841 41
      return true;
2842
    }
2843 20
2844
    return false;
2845
  }
2846
2847
  /**
2848
   * Check if the file is binary.
2849
   *
2850
   * @param string $file
2851
   *
2852
   * @return boolean
2853
   */
2854
  public static function is_binary_file($file)
2855
  {
2856
    try {
2857
      $fp = fopen($file, 'rb');
2858
      $block = fread($fp, 512);
2859
      fclose($fp);
2860
    } catch (\Exception $e) {
2861
      $block = '';
2862
    }
2863
2864
    return self::is_binary($block);
2865
  }
2866
2867
  /**
2868
   * Checks if the given string is equal to any "Byte Order Mark".
2869
   *
2870
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2871
   *
2872
   * @param string $str <p>The input string.</p>
2873
   *
2874
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2875
   */
2876
  public static function is_bom($str)
2877
  {
2878
    foreach (self::$BOM as $bomString => $bomByteLength) {
2879
      if ($str === $bomString) {
2880
        return true;
2881
      }
2882
    }
2883 2
2884
    return false;
2885 2
  }
2886
2887 2
  /**
2888 2
   * Check if the string contains any html-tags <lall>.
2889 2
   *
2890
   * @param string $str <p>The input string.</p>
2891
   *
2892
   * @return boolean
2893 2
   */
2894
  public static function is_html($str)
2895
  {
2896
    $str = (string)$str;
2897
2898
    if (!isset($str[0])) {
2899
      return false;
2900
    }
2901
2902
    // init
2903
    $matches = array();
2904
2905
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2906
2907
    if (count($matches) === 0) {
2908
      return false;
2909
    }
2910
2911
    return true;
2912
  }
2913
2914
  /**
2915
   * Try to check if "$str" is an json-string.
2916
   *
2917
   * @param string $str <p>The input string.</p>
2918
   *
2919
   * @return bool
2920
   */
2921
  public static function is_json($str)
2922
  {
2923
    $str = (string)$str;
2924
2925
    if (!isset($str[0])) {
2926
      return false;
2927
    }
2928
2929
    $json = self::json_decode($str);
2930
2931
    if (
2932 2
        (
2933
            is_object($json) === true
2934 2
            ||
2935
            is_array($json) === true
2936 2
        )
2937
        &&
2938
        json_last_error() === JSON_ERROR_NONE
2939 2
    ) {
2940
      return true;
2941
    }
2942 2
2943
    return false;
2944
  }
2945
2946
  /**
2947
   * Check if the string is UTF-16.
2948
   *
2949
   * @param string $str <p>The input string.</p>
2950
   *
2951
   * @return int|false <p>
2952 6
   *                   <strong>false</strong> if is't not UTF-16,<br />
2953
   *                   <strong>1</strong> for UTF-16LE,<br />
2954 6
   *                   <strong>2</strong> for UTF-16BE.
2955
   *                   </p>
2956
   */
2957 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2958
  {
2959
    $str = self::remove_bom($str);
2960
2961
    if (self::is_binary($str) === true) {
2962
2963
      $maybeUTF16LE = 0;
2964
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2965 24
      if ($test) {
2966
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2967 24
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2968
        if ($test3 === $test) {
2969 24
          $strChars = self::count_chars($str, true);
2970 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2971
            if (in_array($test3char, $strChars, true) === true) {
2972
              $maybeUTF16LE++;
2973
            }
2974 23
          }
2975 2
        }
2976
      }
2977
2978 23
      $maybeUTF16BE = 0;
2979
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2980 23
      if ($test) {
2981
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2982
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2983
        if ($test3 === $test) {
2984
          $strChars = self::count_chars($str, true);
2985
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2986
            if (in_array($test3char, $strChars, true) === true) {
2987
              $maybeUTF16BE++;
2988
            }
2989
          }
2990 1
        }
2991
      }
2992 1
2993
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2994
        if ($maybeUTF16LE > $maybeUTF16BE) {
2995
          return 1;
2996 1
        }
2997
2998
        return 2;
2999
      }
3000
3001
    }
3002
3003
    return false;
3004
  }
3005
3006
  /**
3007 1
   * Check if the string is UTF-32.
3008
   *
3009 1
   * @param string $str
3010 1
   *
3011 1
   * @return int|false <p>
3012
   *                   <strong>false</strong> if is't not UTF-32,<br />
3013 1
   *                   <strong>1</strong> for UTF-32LE,<br />
3014
   *                   <strong>2</strong> for UTF-32BE.
3015
   *                   </p>
3016
   */
3017 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3018
  {
3019
    $str = self::remove_bom($str);
3020
3021
    if (self::is_binary($str) === true) {
3022 2
3023
      $maybeUTF32LE = 0;
3024 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3025
      if ($test) {
3026 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3027 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3028 2
        if ($test3 === $test) {
3029
          $strChars = self::count_chars($str, true);
3030 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3031
            if (in_array($test3char, $strChars, true) === true) {
3032
              $maybeUTF32LE++;
3033
            }
3034
          }
3035
        }
3036
      }
3037
3038
      $maybeUTF32BE = 0;
3039
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3040 1
      if ($test) {
3041
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3042 1
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3043
        if ($test3 === $test) {
3044
          $strChars = self::count_chars($str, true);
3045
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3046 1
            if (in_array($test3char, $strChars, true) === true) {
3047
              $maybeUTF32BE++;
3048
            }
3049
          }
3050
        }
3051
      }
3052
3053
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3054
        if ($maybeUTF32LE > $maybeUTF32BE) {
3055
          return 1;
3056
        }
3057
3058 1
        return 2;
3059
      }
3060 1
3061
    }
3062
3063
    return false;
3064
  }
3065
3066
  /**
3067
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3068
   *
3069
   * @see    http://hsivonen.iki.fi/php-utf8/
3070 16
   *
3071
   * @param string $str    <p>The string to be checked.</p>
3072 16
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3073
   *
3074 16
   * @return bool
3075 2
   */
3076
  public static function is_utf8($str, $strict = false)
3077
  {
3078 16
    $str = (string)$str;
3079 1
3080
    if (!isset($str[0])) {
3081
      return true;
3082 16
    }
3083 4
3084
    if ($strict === true) {
3085
      if (self::is_utf16($str) !== false) {
3086 15
        return false;
3087 14
      }
3088
3089
      if (self::is_utf32($str) !== false) {
3090 4
        return false;
3091 4
      }
3092 4
    }
3093
3094
    if (self::pcre_utf8_support() !== true) {
3095 4
3096 4
      // If even just the first character can be matched, when the /u
3097 4
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3098 4
      // invalid, nothing at all will match, even if the string contains
3099 4
      // some valid sequences
3100 4
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3101 4
    }
3102 4
3103 4
    $mState = 0; // cached expected number of octets after the current octet
3104 4
    // until the beginning of the next UTF8 character sequence
3105 4
    $mUcs4 = 0; // cached Unicode character
3106 4
    $mBytes = 1; // cached expected number of octets in the current sequence
3107 4
3108 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3109 4
      self::checkForSupport();
3110
    }
3111 4
3112 4 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3113 4
      $len = \mb_strlen($str, '8BIT');
3114
    } else {
3115 4
      $len = strlen($str);
3116
    }
3117 4
3118
    /** @noinspection ForeachInvariantsInspection */
3119
    for ($i = 0; $i < $len; $i++) {
3120
      $in = ord($str[$i]);
3121
      if ($mState === 0) {
3122
        // When mState is zero we expect either a US-ASCII character or a
3123
        // multi-octet sequence.
3124
        if (0 === (0x80 & $in)) {
3125
          // US-ASCII, pass straight through.
3126
          $mBytes = 1;
3127 13 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3128
          // First octet of 2 octet sequence.
3129 13
          $mUcs4 = $in;
3130 13
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3131
          $mState = 1;
3132 13
          $mBytes = 2;
3133 1
        } elseif (0xE0 === (0xF0 & $in)) {
3134 1
          // First octet of 3 octet sequence.
3135 1
          $mUcs4 = $in;
3136
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3137 13
          $mState = 2;
3138
          $mBytes = 3;
3139 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3140
          // First octet of 4 octet sequence.
3141
          $mUcs4 = $in;
3142
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3143
          $mState = 3;
3144
          $mBytes = 4;
3145
        } elseif (0xF8 === (0xFC & $in)) {
3146
          /* First octet of 5 octet sequence.
3147
          *
3148
          * This is illegal because the encoded codepoint must be either
3149
          * (a) not the shortest form or
3150 18
          * (b) outside the Unicode range of 0-0x10FFFF.
3151
          * Rather than trying to resynchronize, we will carry on until the end
3152 18
          * of the sequence and let the later error handling code catch it.
3153 18
          */
3154
          $mUcs4 = $in;
3155 18
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3156
          $mState = 4;
3157 18
          $mBytes = 5;
3158 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3159 2
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3160
          $mUcs4 = $in;
3161 2
          $mUcs4 = ($mUcs4 & 1) << 30;
3162
          $mState = 5;
3163 1
          $mBytes = 6;
3164 1
        } else {
3165
          /* Current octet is neither in the US-ASCII range nor a legal first
3166 2
           * octet of a multi-octet sequence.
3167 2
           */
3168
          return false;
3169 18
        }
3170 18
      } else {
3171 1
        // When mState is non-zero, we expect a continuation of the multi-octet
3172 1
        // sequence
3173
        if (0x80 === (0xC0 & $in)) {
3174 18
          // Legal continuation.
3175 18
          $shift = ($mState - 1) * 6;
3176
          $tmp = $in;
3177 18
          $tmp = ($tmp & 0x0000003F) << $shift;
3178
          $mUcs4 |= $tmp;
3179
          /**
3180
           * End of the multi-octet sequence. mUcs4 now contains the final
3181
           * Unicode code point to be output
3182
           */
3183
          if (0 === --$mState) {
3184
            /*
3185
            * Check for illegal sequences and code points.
3186
            */
3187
            // From Unicode 3.1, non-shortest form is illegal
3188
            if (
3189
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3190
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3191
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3192
                (4 < $mBytes) ||
3193
                // From Unicode 3.2, surrogate characters are illegal.
3194
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3195
                // Code points outside the Unicode range are illegal.
3196
                ($mUcs4 > 0x10FFFF)
3197
            ) {
3198
              return false;
3199
            }
3200
            // initialize UTF8 cache
3201
            $mState = 0;
3202
            $mUcs4 = 0;
3203
            $mBytes = 1;
3204
          }
3205
        } else {
3206
          /**
3207
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3208
           * Incomplete multi-octet sequence.
3209
           */
3210
          return false;
3211
        }
3212
      }
3213
    }
3214
3215
    return true;
3216
  }
3217
3218
  /**
3219
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3220
   * Decodes a JSON string
3221
   *
3222
   * @link http://php.net/manual/en/function.json-decode.php
3223
   *
3224
   * @param string $json    <p>
3225
   *                        The <i>json</i> string being decoded.
3226
   *                        </p>
3227
   *                        <p>
3228
   *                        This function only works with UTF-8 encoded strings.
3229
   *                        </p>
3230 17
   *                        <p>PHP implements a superset of
3231
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3232 17
   *                        only supports these values when they are nested inside an array or an object.
3233 3
   *                        </p>
3234
   * @param bool   $assoc   [optional] <p>
3235
   *                        When <b>TRUE</b>, returned objects will be converted into
3236 16
   *                        associative arrays.
3237
   *                        </p>
3238
   * @param int    $depth   [optional] <p>
3239
   *                        User specified recursion depth.
3240 16
   *                        </p>
3241
   * @param int    $options [optional] <p>
3242
   *                        Bitmask of JSON decode options. Currently only
3243
   *                        <b>JSON_BIGINT_AS_STRING</b>
3244
   *                        is supported (default is to cast large integers as floats)
3245
   *                        </p>
3246
   *
3247
   * @return mixed the value encoded in <i>json</i> in appropriate
3248 16
   * PHP type. Values true, false and
3249 16
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3250 15
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3251
   * <i>json</i> cannot be decoded or if the encoded
3252
   * data is deeper than the recursion limit.
3253 9
   */
3254 9 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3255 9
  {
3256
    $json = (string)self::filter($json);
3257 9
3258 1
    if (Bootup::is_php('5.4') === true) {
3259
      $json = json_decode($json, $assoc, $depth, $options);
3260
    } else {
3261 9
      $json = json_decode($json, $assoc, $depth);
3262 4
    }
3263
3264
    return $json;
3265 9
  }
3266 5
3267
  /**
3268
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3269 9
   * Returns the JSON representation of a value.
3270
   *
3271
   * @link http://php.net/manual/en/function.json-encode.php
3272
   *
3273
   * @param mixed $value   <p>
3274
   *                       The <i>value</i> being encoded. Can be any type except
3275
   *                       a resource.
3276
   *                       </p>
3277
   *                       <p>
3278
   *                       All string data must be UTF-8 encoded.
3279
   *                       </p>
3280
   *                       <p>PHP implements a superset of
3281
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3282
   *                       only supports these values when they are nested inside an array or an object.
3283
   *                       </p>
3284
   * @param int   $options [optional] <p>
3285 1
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3286
   *                       <b>JSON_HEX_TAG</b>,
3287
   *                       <b>JSON_HEX_AMP</b>,
3288 1
   *                       <b>JSON_HEX_APOS</b>,
3289
   *                       <b>JSON_NUMERIC_CHECK</b>,
3290 1
   *                       <b>JSON_PRETTY_PRINT</b>,
3291 1
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3292 1
   *                       <b>JSON_FORCE_OBJECT</b>,
3293
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3294
   *                       constants is described on
3295 1
   *                       the JSON constants page.
3296
   *                       </p>
3297
   * @param int   $depth   [optional] <p>
3298
   *                       Set the maximum depth. Must be greater than zero.
3299
   *                       </p>
3300
   *
3301
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3302
   */
3303 41 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3304
  {
3305
    $value = self::filter($value);
3306 41
3307
    if (Bootup::is_php('5.5') === true) {
3308
      $json = json_encode($value, $options, $depth);
3309
    } else {
3310
      $json = json_encode($value, $options);
3311
    }
3312
3313
    return $json;
3314
  }
3315
3316
  /**
3317 1
   * Makes string's first char lowercase.
3318
   *
3319 1
   * @param string $str <p>The input string</p>
3320 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
3321
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3322
   *
3323 1
   * @return string <p>The resulting string</p>
3324 1
   */
3325 1 View Code Duplication
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3326
  {
3327
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3328 1
    if ($strPartTwo === false) {
3329
      $strPartTwo = '';
3330
    }
3331 1
3332
    $strPartOne = self::strtolower(
3333
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3334
        $encoding,
3335 1
        $cleanUtf8
3336 1
    );
3337 1
3338
    return $strPartOne . $strPartTwo;
3339
  }
3340 1
3341
  /**
3342
   * alias for "UTF8::lcfirst()"
3343 1
   *
3344
   * @see UTF8::lcfirst()
3345
   *
3346
   * @param string  $word
3347 1
   * @param string  $encoding
3348
   * @param boolean $cleanUtf8
3349 1
   *
3350 1
   * @return string
3351 1
   */
3352 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3353 1
  {
3354
    return self::lcfirst($word, $encoding, $cleanUtf8);
3355
  }
3356
3357
  /**
3358
   * Lowercase for all words in the string.
3359
   *
3360
   * @param string   $str        <p>The input string.</p>
3361
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3362
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3363
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
3364
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
3365 5
   *
3366
   * @return string
3367 5
   */
3368 View Code Duplication
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3369
  {
3370
    if (!$str) {
3371
      return '';
3372
    }
3373
3374
    $words = self::str_to_words($str, $charlist);
3375
    $newWords = array();
3376
3377 10
    if (count($exceptions) > 0) {
3378
      $useExceptions = true;
3379 10
    } else {
3380 10
      $useExceptions = false;
3381 5
    }
3382 5
3383 10
    foreach ($words as $word) {
3384
3385 10
      if (!$word) {
3386
        continue;
3387
      }
3388
3389
      if (
3390
          $useExceptions === false
3391
          ||
3392
          (
3393
              $useExceptions === true
3394
              &&
3395
              !in_array($word, $exceptions, true)
3396 1
          )
3397
      ) {
3398 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3399 1
      }
3400 1
3401
      $newWords[] = $word;
3402 1
    }
3403 1
3404 1
    return implode('', $newWords);
3405 1
  }
3406 1
3407
  /**
3408 1
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3409
   *
3410
   * @param string $str   <p>The string to be trimmed</p>
3411
   * @param string $chars <p>Optional characters to be stripped</p>
3412
   *
3413
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3414
   */
3415 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3416
  {
3417
    $str = (string)$str;
3418
3419
    if (!isset($str[0])) {
3420
      return '';
3421
    }
3422
3423
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3424 45
    if ($chars === INF || !$chars) {
3425
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3426
    }
3427 45
3428
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3429
  }
3430
3431 45
  /**
3432 45
   * Returns the UTF-8 character with the maximum code point in the given data.
3433 45
   *
3434 45
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3435
   *
3436 45
   * @return string <p>The character with the highest code point than others.</p>
3437
   */
3438 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3439 45
  {
3440 45
    if (is_array($arg) === true) {
3441
      $arg = implode('', $arg);
3442 45
    }
3443
3444
    return self::chr(max(self::codepoints($arg)));
3445
  }
3446
3447
  /**
3448
   * Calculates and returns the maximum number of bytes taken by any
3449
   * UTF-8 encoded character in the given string.
3450
   *
3451
   * @param string $str <p>The original Unicode string.</p>
3452
   *
3453 45
   * @return int <p>Max byte lengths of the given chars.</p>
3454
   */
3455 45
  public static function max_chr_width($str)
3456
  {
3457 45
    $bytes = self::chr_size_list($str);
3458 45
    if (count($bytes) > 0) {
3459 45
      return (int)max($bytes);
3460
    }
3461 45
3462 45
    return 0;
3463 45
  }
3464
3465 45
  /**
3466
   * Checks whether mbstring is available on the server.
3467
   *
3468
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3469
   */
3470
  public static function mbstring_loaded()
3471
  {
3472
    $return = extension_loaded('mbstring') ? true : false;
3473
3474
    if ($return === true) {
3475
      \mb_internal_encoding('UTF-8');
3476 23
    }
3477
3478 23
    return $return;
3479
  }
3480 23
3481 5
  /**
3482
   * Returns the UTF-8 character with the minimum code point in the given data.
3483
   *
3484
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3485 19
   *
3486 3
   * @return string <p>The character with the lowest code point than others.</p>
3487
   */
3488 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3489 18
  {
3490
    if (is_array($arg) === true) {
3491 18
      $arg = implode('', $arg);
3492
    }
3493
3494
    return self::chr(min(self::codepoints($arg)));
3495
  }
3496
3497
  /**
3498
   * alias for "UTF8::normalize_encoding()"
3499
   *
3500
   * @see UTF8::normalize_encoding()
3501
   *
3502 52
   * @param string $encoding
3503
   * @param mixed  $fallback
3504 52
   *
3505
   * @return string
3506 52
   *
3507
   * @deprecated
3508 52
   */
3509 40
  public static function normalizeEncoding($encoding, $fallback = false)
3510
  {
3511
    return self::normalize_encoding($encoding, $fallback);
3512 18
  }
3513
3514
  /**
3515 18
   * Normalize the encoding-"name" input.
3516 17
   *
3517
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3518 17
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3519 17
   *
3520 17
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3521 2
   */
3522 2
  public static function normalize_encoding($encoding, $fallback = false)
3523
  {
3524
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3525 18
3526
    if (!$encoding) {
3527 18
      return $fallback;
3528 18
    }
3529 18
3530
    if ('UTF-8' === $encoding) {
3531 18
      return $encoding;
3532 18
    }
3533 18
3534
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3535
      return $encoding;
3536
    }
3537 18
3538
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3539 18
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3540
    }
3541
3542
    $encodingOrig = $encoding;
3543
    $encoding = strtoupper($encoding);
3544
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3545
3546
    $equivalences = array(
3547
        'ISO88591'    => 'ISO-8859-1',
3548
        'ISO8859'     => 'ISO-8859-1',
3549
        'ISO'         => 'ISO-8859-1',
3550
        'LATIN1'      => 'ISO-8859-1',
3551
        'LATIN'       => 'ISO-8859-1',
3552
        'WIN1252'     => 'ISO-8859-1',
3553
        'WINDOWS1252' => 'ISO-8859-1',
3554
        'UTF16'       => 'UTF-16',
3555
        'UTF32'       => 'UTF-32',
3556
        'UTF8'        => 'UTF-8',
3557
        'UTF'         => 'UTF-8',
3558
        'UTF7'        => 'UTF-7',
3559
        '8BIT'        => 'CP850',
3560 1
        'BINARY'      => 'CP850',
3561
    );
3562 1
3563 1
    if (!empty($equivalences[$encodingUpperHelper])) {
3564
      $encoding = $equivalences[$encodingUpperHelper];
3565
    }
3566
3567
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3568 1
3569 1
    return $encoding;
3570 1
  }
3571 1
3572
  /**
3573
   * Normalize some MS Word special characters.
3574 1
   *
3575
   * @param string $str <p>The string to be normalized.</p>
3576
   *
3577
   * @return string
3578
   */
3579 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3580
  {
3581
    // init
3582
    $str = (string)$str;
3583
3584
    if (!isset($str[0])) {
3585
      return '';
3586 36
    }
3587
3588 36
    static $UTF8_MSWORD_KEYS_CACHE = null;
3589
    static $UTF8_MSWORD_VALUES_CACHE = null;
3590 36
3591 2
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3592
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3593
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3594
    }
3595 36
3596 36
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3597
  }
3598 36
3599
  /**
3600
   * Normalize the whitespace.
3601
   *
3602 36
   * @param string $str                     <p>The string to be normalized.</p>
3603
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3604 36
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3605 6
   *                                        bidirectional text chars.</p>
3606 6
   *
3607
   * @return string
3608 36
   */
3609 36
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3610 36
  {
3611 36
    // init
3612 36
    $str = (string)$str;
3613
3614 36
    if (!isset($str[0])) {
3615
      return '';
3616
    }
3617
3618
    static $WHITESPACE_CACHE = array();
3619
    $cacheKey = (int)$keepNonBreakingSpace;
3620
3621
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3622
3623
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3624
3625
      if ($keepNonBreakingSpace === true) {
3626
        /** @noinspection OffsetOperationsInspection */
3627
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3628
      }
3629
3630
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3631
    }
3632
3633
    if ($keepBidiUnicodeControls === false) {
3634
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3635
3636
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3637
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3638
      }
3639
3640
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3641
    }
3642
3643
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3644
  }
3645
3646 36
  /**
3647 5
   * Strip all whitespace characters. This includes tabs and newline
3648
   * characters, as well as multibyte whitespace such as the thin space
3649 5
   * and ideographic space.
3650 5
   *
3651
   * @param string $str
3652
   *
3653 36
   * @return string
3654
   */
3655
  public static function strip_whitespace($str)
3656
  {
3657 36
    // init
3658
    $str = (string)$str;
3659
3660
    if (!isset($str[0])) {
3661
      return '';
3662
    }
3663
3664
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3665
  }
3666
3667
  /**
3668
   * Format a number with grouped thousands.
3669
   *
3670 12
   * @param float  $number
3671
   * @param int    $decimals
3672
   * @param string $dec_point
3673
   * @param string $thousands_sep
3674
   *
3675
   * @return string
3676 12
   *    *
3677 2
   * @deprecated Because this has nothing to do with UTF8. :/
3678 1
   */
3679 2
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3680 1
  {
3681 2
    $thousands_sep = (string)$thousands_sep;
3682
    $dec_point = (string)$dec_point;
3683 2
    $number = (float)$number;
3684
3685
    if (
3686 2
        isset($thousands_sep[1], $dec_point[1])
3687
        &&
3688
        Bootup::is_php('5.4') === true
3689
    ) {
3690
      return str_replace(
3691
          array(
3692 12
              '.',
3693 3
              ',',
3694
          ),
3695
          array(
3696
              $dec_point,
3697
              $thousands_sep,
3698
          ),
3699
          number_format($number, $decimals, '.', ',')
3700 12
      );
3701 9
    }
3702
3703
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3704
  }
3705
3706
  /**
3707
   * Calculates Unicode code point of the given UTF-8 encoded character.
3708
   *
3709
   * INFO: opposite to UTF8::chr()
3710 6
   *
3711 6
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3712 6
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3713 6
   *
3714 6
   * @return int <p>
3715 6
   *             Unicode code point of the given character,<br />
3716 6
   *             0 on invalid UTF-8 byte sequence.
3717 6
   *             </p>
3718 6
   */
3719 6
  public static function ord($chr, $encoding = 'UTF-8')
3720 6
  {
3721 6
3722 6
    if ($encoding !== 'UTF-8') {
3723 6
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3724 6
3725 6
      // check again, if it's still not UTF-8
3726 6
      /** @noinspection NotOptimalIfConditionsInspection */
3727 6
      if ($encoding !== 'UTF-8') {
3728 6
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3729 6
      }
3730 6
    }
3731
3732 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3733 6
      self::checkForSupport();
3734 6
    }
3735
3736
    if (self::$SUPPORT['intlChar'] === true) {
3737
      $tmpReturn = \IntlChar::ord($chr);
3738
      if ($tmpReturn) {
3739
        return $tmpReturn;
3740
      }
3741
    }
3742
3743
    // use static cache, if there is no support for "\IntlChar"
3744
    static $CHAR_CACHE = array();
3745
    if (isset($CHAR_CACHE[$chr]) === true) {
3746
      return $CHAR_CACHE[$chr];
3747
    }
3748
3749
    $chr_orig = $chr;
3750
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3751
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3752
    $code = $chr ? $chr[1] : 0;
3753
3754
    if (0xF0 <= $code && isset($chr[4])) {
3755
      return $CHAR_CACHE[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3756
    }
3757
3758
    if (0xE0 <= $code && isset($chr[3])) {
3759
      return $CHAR_CACHE[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3760
    }
3761
3762
    if (0xC0 <= $code && isset($chr[2])) {
3763
      return $CHAR_CACHE[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3764
    }
3765
3766
    return $CHAR_CACHE[$chr_orig] = $code;
3767
  }
3768
3769
  /**
3770
   * Parses the string into an array (into the the second parameter).
3771
   *
3772
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3773
   *          if the second parameter is not set!
3774
   *
3775
   * @link http://php.net/manual/en/function.parse-str.php
3776
   *
3777
   * @param string  $str       <p>The input string.</p>
3778 14
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3779
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3780 14
   *
3781
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3782
   */
3783 14
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3784 14
  {
3785 1
    if ($cleanUtf8 === true) {
3786 1
      $str = self::clean($str);
3787 13
    }
3788
3789 14
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3790
    $return = \mb_parse_str($str, $result);
3791 14
    if ($return === false || empty($result)) {
3792 14
      return false;
3793
    }
3794 14
3795
    return true;
3796
  }
3797
3798
  /**
3799
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3800
   *
3801
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3802
   */
3803
  public static function pcre_utf8_support()
3804
  {
3805
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3806 1
    return (bool)@preg_match('//u', '');
3807
  }
3808 1
3809
  /**
3810 1
   * Create an array containing a range of UTF-8 characters.
3811
   *
3812
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3813
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3814 1
   *
3815
   * @return array
3816 1
   */
3817
  public static function range($var1, $var2)
3818
  {
3819
    if (!$var1 || !$var2) {
3820 1
      return array();
3821 1
    }
3822
3823 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3824 1
      $start = (int)$var1;
3825 1
    } elseif (ctype_xdigit($var1)) {
3826 1
      $start = (int)self::hex_to_int($var1);
3827 1
    } else {
3828
      $start = self::ord($var1);
3829 1
    }
3830
3831
    if (!$start) {
3832 1
      return array();
3833
    }
3834
3835 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3836
      $end = (int)$var2;
3837
    } elseif (ctype_xdigit($var2)) {
3838
      $end = (int)self::hex_to_int($var2);
3839
    } else {
3840
      $end = self::ord($var2);
3841
    }
3842
3843
    if (!$end) {
3844
      return array();
3845
    }
3846
3847
    return array_map(
3848
        array(
3849
            '\\voku\\helper\\UTF8',
3850
            'chr',
3851 2
        ),
3852
        range($start, $end)
3853 2
    );
3854
  }
3855
3856 2
  /**
3857 2
   * Multi decode html entity & fix urlencoded-win1252-chars.
3858
   *
3859 2
   * e.g:
3860
   * 'test+test'                     => 'test+test'
3861 2
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3862 2
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3863
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3864 2
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3865
   * 'Düsseldorf'                   => 'Düsseldorf'
3866
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3867 2
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3868 2
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3869 2
   *
3870 2
   * @param string $str          <p>The input string.</p>
3871 2
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3872
   *
3873 2
   * @return string
3874 2
   */
3875 2 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3876 2
  {
3877 2
    $str = (string)$str;
3878 2
3879
    if (!isset($str[0])) {
3880 2
      return '';
3881 2
    }
3882 2
3883 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3884 2
    if (preg_match($pattern, $str)) {
3885 2
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3886
    }
3887 2
3888
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3889
3890 2
    do {
3891
      $str_compare = $str;
3892
3893
      $str = self::fix_simple_utf8(
3894
          rawurldecode(
3895
              self::html_entity_decode(
3896
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3897
                  $flags
3898
              )
3899
          )
3900
      );
3901
3902
    } while ($multi_decode === true && $str_compare !== $str);
3903
3904
    return (string)$str;
3905
  }
3906
3907
  /**
3908
   * alias for "UTF8::remove_bom()"
3909
   *
3910
   * @see UTF8::remove_bom()
3911 1
   *
3912
   * @param string $str
3913 1
   *
3914
   * @return string
3915 1
   *
3916
   * @deprecated
3917
   */
3918
  public static function removeBOM($str)
3919
  {
3920
    return self::remove_bom($str);
3921
  }
3922
3923
  /**
3924
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3925
   *
3926
   * @param string $str <p>The input string.</p>
3927
   *
3928
   * @return string <p>String without UTF-BOM</p>
3929
   */
3930
  public static function remove_bom($str)
3931
  {
3932
    $str = (string)$str;
3933
3934
    if (!isset($str[0])) {
3935
      return '';
3936
    }
3937
3938
    foreach (self::$BOM as $bomString => $bomByteLength) {
3939
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3940
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3941
        if ($strTmp === false) {
3942
          $strTmp = '';
3943
        }
3944
        $str = (string)$strTmp;
3945
      }
3946
    }
3947 12
3948
    return $str;
3949 12
  }
3950
3951
  /**
3952
   * Removes duplicate occurrences of a string in another string.
3953
   *
3954
   * @param string          $str  <p>The base string.</p>
3955
   * @param string|string[] $what <p>String to search for in the base string.</p>
3956
   *
3957
   * @return string <p>The result string with removed duplicates.</p>
3958
   */
3959 1
  public static function remove_duplicates($str, $what = ' ')
3960
  {
3961 1
    if (is_string($what) === true) {
3962
      $what = array($what);
3963 1
    }
3964
3965 1
    if (is_array($what) === true) {
3966
      /** @noinspection ForeachSourceInspection */
3967
      foreach ($what as $item) {
3968
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3969
      }
3970
    }
3971
3972
    return $str;
3973
  }
3974
3975
  /**
3976
   * Remove invisible characters from a string.
3977 1
   *
3978
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3979 1
   *
3980
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3981 1
   *
3982 1
   * @param string $str
3983 1
   * @param bool   $url_encoded
3984
   * @param string $replacement
3985 1
   *
3986 1
   * @return string
3987 1
   */
3988 1
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3989
  {
3990
    // init
3991 1
    $non_displayables = array();
3992
3993
    // every control character except newline (dec 10),
3994
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3995
    if ($url_encoded) {
3996
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3997
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3998
    }
3999
4000
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4001
4002 21
    do {
4003
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4004
    } while ($count !== 0);
4005 21
4006 21
    return $str;
4007
  }
4008 21
4009 1
  /**
4010
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4011
   *
4012 20
   * @param string $str                <p>The input string</p>
4013
   * @param string $replacementChar    <p>The replacement character.</p>
4014
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4015
   *
4016 20
   * @return string
4017 20
   */
4018
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4019 20
  {
4020 20
    $str = (string)$str;
4021
4022
    if (!isset($str[0])) {
4023 1
      return '';
4024 1
    }
4025
4026
    if ($processInvalidUtf8 === true) {
4027 1
      $replacementCharHelper = $replacementChar;
4028 1
      if ($replacementChar === '') {
4029 1
        $replacementCharHelper = 'none';
4030 1
      }
4031 1
4032
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4033 1
        self::checkForSupport();
4034
      }
4035 1
4036
      if (self::$SUPPORT['mbstring'] === false) {
4037
        trigger_error('UTF8::replace_diamond_question_mark() without mbstring cannot handle all chars correctly', E_USER_WARNING);
4038
      }
4039
4040
      $save = \mb_substitute_character();
4041
      \mb_substitute_character($replacementCharHelper);
4042
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4043
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4044
      \mb_substitute_character($save);
4045 1
    }
4046
4047 1
    return str_replace(
4048
        array(
4049 1
            "\xEF\xBF\xBD",
4050
            '�',
4051 1
        ),
4052
        array(
4053
            $replacementChar,
4054
            $replacementChar,
4055
        ),
4056
        $str
4057
    );
4058
  }
4059
4060
  /**
4061
   * Strip whitespace or other characters from end of a UTF-8 string.
4062
   *
4063
   * @param string $str   <p>The string to be trimmed.</p>
4064
   * @param string $chars <p>Optional characters to be stripped.</p>
4065 7
   *
4066
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4067 7
   */
4068 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4069
  {
4070
    $str = (string)$str;
4071
4072
    if (!isset($str[0])) {
4073
      return '';
4074
    }
4075
4076
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4077
    if ($chars === INF || !$chars) {
4078
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4079
    }
4080
4081
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4082
  }
4083 1
4084
  /**
4085 1
   * rxClass
4086 1
   *
4087
   * @param string $s
4088 1
   * @param string $class
4089
   *
4090 1
   * @return string
4091
   */
4092 1
  private static function rxClass($s, $class = '')
4093 1
  {
4094 1
    static $RX_CLASSS_CACHE = array();
4095 1
4096
    $cacheKey = $s . $class;
4097 1
4098
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4099 1
      return $RX_CLASSS_CACHE[$cacheKey];
4100 1
    }
4101 1
4102 1
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4103 1
    $class = array($class);
4104 1
4105
    /** @noinspection SuspiciousLoopInspection */
4106 1
    foreach (self::str_split($s) as $s) {
4107
      if ('-' === $s) {
4108 1
        $class[0] = '-' . $class[0];
4109
      } elseif (!isset($s[2])) {
4110
        $class[0] .= preg_quote($s, '/');
4111
      } elseif (1 === self::strlen($s)) {
4112 1
        $class[0] .= $s;
4113
      } else {
4114
        $class[] = $s;
4115
      }
4116
    }
4117
4118
    if ($class[0]) {
4119
      $class[0] = '[' . $class[0] . ']';
4120
    }
4121
4122
    if (1 === count($class)) {
4123
      $return = $class[0];
4124
    } else {
4125
      $return = '(?:' . implode('|', $class) . ')';
4126
    }
4127
4128
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4129 9
4130
    return $return;
4131 9
  }
4132
4133
  /**
4134
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
4135
   */
4136
  public static function showSupport()
4137
  {
4138
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4139
      self::checkForSupport();
4140
    }
4141
4142
    foreach (self::$SUPPORT as $utf8Support) {
4143
      echo $utf8Support . "\n<br>";
4144
    }
4145
  }
4146
4147 1
  /**
4148
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4149 1
   *
4150
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4151
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4152
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4153
   *
4154
   * @return string <p>The HTML numbered entity.</p>
4155
   */
4156
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4157
  {
4158
    // init
4159
    $char = (string)$char;
4160
4161
    if (!isset($char[0])) {
4162
      return '';
4163
    }
4164 12
4165
    if (
4166 12
        $keepAsciiChars === true
4167 11
        &&
4168 11
        self::is_ascii($char) === true
4169 12
    ) {
4170
      return $char;
4171
    }
4172
4173
    if ($encoding !== 'UTF-8') {
4174
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4175
    }
4176
4177
    return '&#' . self::ord($char, $encoding) . ';';
4178
  }
4179
4180
  /**
4181
   * Convert a string to an array of Unicode characters.
4182 9
   *
4183
   * @param string  $str       <p>The string to split into array.</p>
4184 9
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4185 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4186
   *
4187
   * @return string[] <p>An array containing chunks of the string.</p>
4188 8
   */
4189 2
  public static function split($str, $length = 1, $cleanUtf8 = false)
4190 2
  {
4191
    $str = (string)$str;
4192 8
4193 8
    if (!isset($str[0])) {
4194 1
      return array();
4195
    }
4196
4197 7
    // init
4198
    $ret = array();
4199 7
4200
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4201
      self::checkForSupport();
4202 1
    }
4203
4204
    if ($cleanUtf8 === true) {
4205
      $str = self::clean($str);
4206
    }
4207
4208
    if (self::$SUPPORT['pcre_utf8'] === true) {
4209
4210
      preg_match_all('/./us', $str, $retArray);
4211
      if (isset($retArray[0])) {
4212
        $ret = $retArray[0];
4213
      }
4214
      unset($retArray);
4215
4216
    } else {
4217
4218 1
      // fallback
4219
4220 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4221
        self::checkForSupport();
4222
      }
4223
4224 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4225
        $len = \mb_strlen($str, '8BIT');
4226
      } else {
4227
        $len = strlen($str);
4228
      }
4229
4230
      /** @noinspection ForeachInvariantsInspection */
4231
      for ($i = 0; $i < $len; $i++) {
4232 2
4233
        if (($str[$i] & "\x80") === "\x00") {
4234 2
4235 2
          $ret[] = $str[$i];
4236
4237 2
        } elseif (
4238 2
            isset($str[$i + 1])
4239 2
            &&
4240
            ($str[$i] & "\xE0") === "\xC0"
4241 2
        ) {
4242 2
4243
          if (($str[$i + 1] & "\xC0") === "\x80") {
4244
            $ret[] = $str[$i] . $str[$i + 1];
4245
4246
            $i++;
4247
          }
4248
4249 View Code Duplication
        } elseif (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4250
            isset($str[$i + 2])
4251
            &&
4252 3
            ($str[$i] & "\xF0") === "\xE0"
4253
        ) {
4254 3
4255 3
          if (
4256 3
              ($str[$i + 1] & "\xC0") === "\x80"
4257
              &&
4258 3
              ($str[$i + 2] & "\xC0") === "\x80"
4259
          ) {
4260 3
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4261
4262
            $i += 2;
4263
          }
4264
4265
        } elseif (
4266
            isset($str[$i + 3])
4267
            &&
4268
            ($str[$i] & "\xF8") === "\xF0"
4269
        ) {
4270
4271 View Code Duplication
          if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4272
              ($str[$i + 1] & "\xC0") === "\x80"
4273
              &&
4274
              ($str[$i + 2] & "\xC0") === "\x80"
4275
              &&
4276
              ($str[$i + 3] & "\xC0") === "\x80"
4277
          ) {
4278
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4279
4280
            $i += 3;
4281
          }
4282 2
4283
        }
4284
      }
4285 2
    }
4286
4287 2
    if ($length > 1) {
4288
      $ret = array_chunk($ret, $length);
4289
4290
      return array_map(
4291
          function ($item) {
4292
            return implode('', $item);
4293
          }, $ret
4294
      );
4295
    }
4296
4297
    /** @noinspection OffsetOperationsInspection */
4298
    if (isset($ret[0]) && $ret[0] === '') {
4299
      return array();
4300
    }
4301
4302
    return $ret;
4303
  }
4304
4305
  /**
4306
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4307
   *
4308
   * @param string $str <p>The input string.</p>
4309
   *
4310
   * @return false|string <p>
4311
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4312
   *                      otherwise it will return false.
4313
   *                      </p>
4314 8
   */
4315
  public static function str_detect_encoding($str)
4316 8
  {
4317 8
    //
4318
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4319 8
    //
4320 3
4321
    if (self::is_binary($str) === true) {
4322
4323 7
      if (self::is_utf16($str) === 1) {
4324 1
        return 'UTF-16LE';
4325 1
      }
4326 1
4327
      if (self::is_utf16($str) === 2) {
4328
        return 'UTF-16BE';
4329
      }
4330 7
4331 1
      if (self::is_utf32($str) === 1) {
4332 7
        return 'UTF-32LE';
4333 7
      }
4334 7
4335
      if (self::is_utf32($str) === 2) {
4336
        return 'UTF-32BE';
4337
      }
4338 7
4339
    }
4340
4341
    //
4342
    // 2.) simple check for ASCII chars
4343
    //
4344
4345
    if (self::is_ascii($str) === true) {
4346
      return 'ASCII';
4347
    }
4348
4349
    //
4350
    // 3.) simple check for UTF-8 chars
4351
    //
4352
4353
    if (self::is_utf8($str) === true) {
4354
      return 'UTF-8';
4355 8
    }
4356
4357 8
    //
4358 2
    // 4.) check via "\mb_detect_encoding()"
4359
    //
4360
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4361 6
4362
    $detectOrder = array(
4363
        'ISO-8859-1',
4364
        'ISO-8859-2',
4365 6
        'ISO-8859-3',
4366
        'ISO-8859-4',
4367
        'ISO-8859-5',
4368
        'ISO-8859-6',
4369
        'ISO-8859-7',
4370
        'ISO-8859-8',
4371
        'ISO-8859-9',
4372 6
        'ISO-8859-10',
4373
        'ISO-8859-13',
4374
        'ISO-8859-14',
4375
        'ISO-8859-15',
4376
        'ISO-8859-16',
4377
        'WINDOWS-1251',
4378
        'WINDOWS-1252',
4379
        'WINDOWS-1254',
4380
        'ISO-2022-JP',
4381
        'JIS',
4382
        'EUC-JP',
4383
    );
4384
4385
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4386
    if ($encoding) {
4387 62
      return $encoding;
4388
    }
4389 62
4390
    //
4391 62
    // 5.) check via "iconv()"
4392 4
    //
4393
4394
    $md5 = md5($str);
4395
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4396
      # INFO: //IGNORE and //TRANSLIT still throw notice
4397 61
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4398 2
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4399 61
        return $encodingTmp;
4400 60
      }
4401 60
    }
4402 2
4403
    return false;
4404
  }
4405
4406 61
  /**
4407 61
   * Check if the string ends with the given substring.
4408 1
   *
4409
   * @param string $haystack <p>The string to search in.</p>
4410
   * @param string $needle   <p>The substring to search for.</p>
4411 61
   *
4412 2
   * @return bool
4413 2
   */
4414
  public static function str_ends_with($haystack, $needle)
4415 61
  {
4416
    $haystack = (string)$haystack;
4417
    $needle = (string)$needle;
4418
4419
    if (!isset($haystack[0], $needle[0])) {
4420
      return false;
4421
    }
4422
4423
    $haystackSub = self::substr($haystack, -self::strlen($needle));
4424
    if ($haystackSub === false) {
4425
      return false;
4426
    }
4427
4428
    if ($needle === $haystackSub) {
4429
      return true;
4430 1
    }
4431
4432 1
    return false;
4433
  }
4434
4435
  /**
4436
   * Check if the string ends with the given substring, case insensitive.
4437
   *
4438
   * @param string $haystack <p>The string to search in.</p>
4439
   * @param string $needle   <p>The substring to search for.</p>
4440
   *
4441
   * @return bool
4442
   */
4443 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4444
  {
4445
    $haystack = (string)$haystack;
4446
    $needle = (string)$needle;
4447
4448
    if (!isset($haystack[0], $needle[0])) {
4449 2
      return false;
4450
    }
4451 2
4452
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4453
      return true;
4454
    }
4455
4456
    return false;
4457
  }
4458
4459
  /**
4460
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4461
   *
4462
   * @link  http://php.net/manual/en/function.str-ireplace.php
4463
   *
4464
   * @param mixed $search  <p>
4465
   *                       Every replacement with search array is
4466
   *                       performed on the result of previous replacement.
4467 1
   *                       </p>
4468
   * @param mixed $replace <p>
4469 1
   *                       </p>
4470
   * @param mixed $subject <p>
4471
   *                       If subject is an array, then the search and
4472
   *                       replace is performed with every entry of
4473
   *                       subject, and the return value is an array as
4474
   *                       well.
4475
   *                       </p>
4476
   * @param int   $count   [optional] <p>
4477
   *                       The number of matched and replaced needles will
4478
   *                       be returned in count which is passed by
4479
   *                       reference.
4480
   *                       </p>
4481
   *
4482
   * @return mixed <p>A string or an array of replacements.</p>
4483
   */
4484
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4485 2
  {
4486
    $search = (array)$search;
4487 2
4488 2
    /** @noinspection AlterInForeachInspection */
4489
    foreach ($search as &$s) {
4490 2
      if ('' === $s .= '') {
4491
        $s = '/^(?<=.)$/';
4492
      } else {
4493
        $s = '/' . preg_quote($s, '/') . '/ui';
4494
      }
4495
    }
4496
4497
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4498
    $count = $replace; // used as reference parameter
4499
4500
    return $subject;
4501
  }
4502
4503 1
  /**
4504
   * Check if the string starts with the given substring, case insensitive.
4505 1
   *
4506 1
   * @param string $haystack <p>The string to search in.</p>
4507
   * @param string $needle   <p>The substring to search for.</p>
4508 1
   *
4509 1
   * @return bool
4510
   */
4511 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4512 1
  {
4513 1
    $haystack = (string)$haystack;
4514
    $needle = (string)$needle;
4515 1
4516
    if (!isset($haystack[0], $needle[0])) {
4517
      return false;
4518
    }
4519
4520
    if (self::stripos($haystack, $needle) === 0) {
4521
      return true;
4522
    }
4523
4524
    return false;
4525
  }
4526
4527
  /**
4528
   * Limit the number of characters in a string, but also after the next word.
4529
   *
4530
   * @param string $str
4531
   * @param int    $length
4532
   * @param string $strAddOn
4533
   *
4534
   * @return string
4535 15
   */
4536
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4537 15
  {
4538 15
    $str = (string)$str;
4539
4540 15
    if (!isset($str[0])) {
4541 2
      return '';
4542
    }
4543
4544
    $length = (int)$length;
4545 14
4546
    if (self::strlen($str) <= $length) {
4547
      return $str;
4548
    }
4549 14
4550
    if (self::substr($str, $length - 1, 1) === ' ') {
4551
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4552
    }
4553 14
4554
    $str = (string)self::substr($str, 0, $length);
4555
    $array = explode(' ', $str);
4556 2
    array_pop($array);
4557 2
    $new_str = implode(' ', $array);
4558 2
4559
    if ($new_str === '') {
4560 14
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4561
    } else {
4562
      $str = $new_str . $strAddOn;
4563
    }
4564
4565
    return $str;
4566 14
  }
4567 2
4568 14
  /**
4569 14
   * Pad a UTF-8 string to given length with another string.
4570 14
   *
4571 1
   * @param string $str        <p>The input string.</p>
4572
   * @param int    $pad_length <p>The length of return string.</p>
4573
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4574 14
   * @param int    $pad_type   [optional] <p>
4575 14
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4576
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4577
   *                           </p>
4578
   *
4579
   * @return string <strong>Returns the padded string</strong>
4580
   */
4581
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4582
  {
4583
    $str_length = self::strlen($str);
4584
4585
    if (
4586
        is_int($pad_length) === true
4587
        &&
4588
        $pad_length > 0
4589
        &&
4590
        $pad_length >= $str_length
4591
    ) {
4592
      $ps_length = self::strlen($pad_string);
4593
4594
      $diff = $pad_length - $str_length;
4595
4596
      switch ($pad_type) {
4597 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4598
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4599
          $pre = (string)self::substr($pre, 0, $diff);
4600
          $post = '';
4601
          break;
4602
4603
        case STR_PAD_BOTH:
4604
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4605
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4606
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4607
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4608
          break;
4609
4610
        case STR_PAD_RIGHT:
4611 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4612
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4613
          $post = (string)self::substr($post, 0, $diff);
4614
          $pre = '';
4615
      }
4616
4617
      return $pre . $str . $post;
4618
    }
4619
4620 1
    return $str;
4621
  }
4622 1
4623 1
  /**
4624 1
   * Repeat a string.
4625
   *
4626 1
   * @param string $str        <p>
4627
   *                           The string to be repeated.
4628
   *                           </p>
4629
   * @param int    $multiplier <p>
4630
   *                           Number of time the input string should be
4631
   *                           repeated.
4632
   *                           </p>
4633 1
   *                           <p>
4634
   *                           multiplier has to be greater than or equal to 0.
4635
   *                           If the multiplier is set to 0, the function
4636
   *                           will return an empty string.
4637
   *                           </p>
4638
   *
4639
   * @return string <p>The repeated string.</p>
4640
   */
4641
  public static function str_repeat($str, $multiplier)
4642
  {
4643 4
    $str = self::filter($str);
4644
4645 4
    return str_repeat($str, $multiplier);
4646
  }
4647 4
4648 2
  /**
4649
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4650
   *
4651 3
   * Replace all occurrences of the search string with the replacement string
4652
   *
4653
   * @link http://php.net/manual/en/function.str-replace.php
4654
   *
4655
   * @param mixed $search  <p>
4656
   *                       The value being searched for, otherwise known as the needle.
4657
   *                       An array may be used to designate multiple needles.
4658
   *                       </p>
4659
   * @param mixed $replace <p>
4660
   *                       The replacement value that replaces found search
4661
   *                       values. An array may be used to designate multiple replacements.
4662
   *                       </p>
4663
   * @param mixed $subject <p>
4664
   *                       The string or array being searched and replaced on,
4665
   *                       otherwise known as the haystack.
4666
   *                       </p>
4667
   *                       <p>
4668
   *                       If subject is an array, then the search and
4669
   *                       replace is performed with every entry of
4670
   *                       subject, and the return value is an array as
4671
   *                       well.
4672
   *                       </p>
4673
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4674
   *
4675
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4676
   */
4677 1
  public static function str_replace($search, $replace, $subject, &$count = null)
4678
  {
4679 1
    return str_replace($search, $replace, $subject, $count);
4680 1
  }
4681 1
4682
  /**
4683 1
   * Replace the first "$search"-term with the "$replace"-term.
4684
   *
4685
   * @param string $search
4686
   * @param string $replace
4687
   * @param string $subject
4688
   *
4689
   * @return string
4690 1
   */
4691
  public static function str_replace_first($search, $replace, $subject)
4692
  {
4693
    $pos = self::strpos($subject, $search);
4694
4695
    if ($pos !== false) {
4696
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4697
    }
4698
4699
    return $subject;
4700
  }
4701
4702
  /**
4703
   * Shuffles all the characters in the string.
4704
   *
4705
   * @param string $str <p>The input string</p>
4706
   *
4707 1
   * @return string <p>The shuffled string.</p>
4708
   */
4709 1
  public static function str_shuffle($str)
4710
  {
4711
    $array = self::split($str);
4712
4713
    shuffle($array);
4714
4715
    return implode('', $array);
4716
  }
4717
4718
  /**
4719
   * Sort all characters according to code points.
4720
   *
4721
   * @param string $str    <p>A UTF-8 string.</p>
4722
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4723
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4724
   *
4725
   * @return string <p>String of sorted characters.</p>
4726
   */
4727
  public static function str_sort($str, $unique = false, $desc = false)
4728
  {
4729 11
    $array = self::codepoints($str);
4730
4731 11
    if ($unique) {
4732
      $array = array_flip(array_flip($array));
4733 11
    }
4734 2
4735 2
    if ($desc) {
4736
      arsort($array);
4737 11
    } else {
4738
      asort($array);
4739 11
    }
4740 2
4741
    return self::string($array);
4742
  }
4743
4744 10
  /**
4745 10
   * Split a string into an array.
4746
   *
4747
   * @param string $str
4748
   * @param int    $len
4749 10
   *
4750
   * @return array
4751 10
   */
4752
  public static function str_split($str, $len = 1)
4753
  {
4754 3
    // init
4755 3
    $len = (int)$len;
4756 3
    $str = (string)$str;
4757
4758 10
    if (!isset($str[0])) {
4759
      return array();
4760
    }
4761
4762
    if ($len < 1) {
4763
      return str_split($str, $len);
4764 10
    }
4765 1
4766 10
    /** @noinspection PhpInternalEntityUsedInspection */
4767 10
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4768 10
    $a = $a[0];
4769 1
4770
    if ($len === 1) {
4771
      return $a;
4772
    }
4773
4774 10
    $arrayOutput = array();
4775 10
    $p = -1;
4776 10
4777 10
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4778
    foreach ($a as $l => $a) {
4779
      if ($l % $len) {
4780
        $arrayOutput[$p] .= $a;
4781
      } else {
4782
        $arrayOutput[++$p] = $a;
4783
      }
4784
    }
4785
4786
    return $arrayOutput;
4787
  }
4788
4789
  /**
4790
   * Check if the string starts with the given substring.
4791
   *
4792
   * @param string $haystack <p>The string to search in.</p>
4793
   * @param string $needle   <p>The substring to search for.</p>
4794
   *
4795
   * @return bool
4796
   */
4797 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4798
  {
4799
    $haystack = (string)$haystack;
4800
    $needle = (string)$needle;
4801
4802
    if (!isset($haystack[0], $needle[0])) {
4803
      return false;
4804
    }
4805
4806
    if (self::strpos($haystack, $needle) === 0) {
4807
      return true;
4808
    }
4809
4810
    return false;
4811
  }
4812
4813 10
  /**
4814
   * Get a binary representation of a specific string.
4815
   *
4816 10
   * @param string $str <p>The input string.</p>
4817 10
   *
4818
   * @return string
4819 10
   */
4820 2
  public static function str_to_binary($str)
4821 2
  {
4822
    $str = (string)$str;
4823 10
4824 10
    $value = unpack('H*', $str);
4825 2
4826
    return base_convert($value[1], 16, 2);
4827
  }
4828 8
4829
  /**
4830
   * Convert a string into an array of words.
4831
   *
4832
   * @param string   $str
4833
   * @param string   $charlist
4834
   * @param bool     $removeEmptyValues
4835
   * @param null|int $removeShortValues
4836
   *
4837
   * @return array
4838
   */
4839
  public static function str_to_words($str, $charlist = '', $removeEmptyValues = false, $removeShortValues = null)
4840
  {
4841
    // init
4842
    $str = (string)$str;
4843
4844
    if ($removeShortValues !== null) {
4845 2
      $removeShortValues = (int)$removeShortValues;
4846
    }
4847 2
4848
    if (!isset($str[0])) {
4849
      if ($removeEmptyValues === true) {
4850
        return array();
4851
      }
4852
4853
      return array('');
4854 2
    }
4855 1
4856 1
    $charlist = self::rxClass($charlist, '\pL');
4857
4858
    $return = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4859
4860 2
    if (
4861 2
        $removeShortValues === null
4862 2
        &&
4863 2
        $removeEmptyValues === false
4864
    ) {
4865
      return $return;
4866
    }
4867
4868
    $tmpReturn = array();
4869
    foreach ($return as $returnValue) {
4870
      if (
4871
          $removeShortValues !== null
4872
          &&
4873
          self::strlen($returnValue) <= $removeShortValues
4874
      ) {
4875
        continue;
4876
      }
4877
4878
      if (
4879
          $removeEmptyValues === true
4880
          &&
4881
          trim($returnValue) === ''
4882 11
      ) {
4883
        continue;
4884 11
      }
4885 11
4886 11
      $tmpReturn[] = $returnValue;
4887
    }
4888 11
4889 1
    return $tmpReturn;
4890 1
  }
4891 1
4892
  /**
4893 11
   * alias for "UTF8::to_ascii()"
4894
   *
4895 11
   * @see UTF8::to_ascii()
4896
   *
4897 11
   * @param string $str
4898 1
   * @param string $unknown
4899 1
   * @param bool   $strict
4900
   *
4901
   * @return string
4902 11
   */
4903 11
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4904
  {
4905 11
    return self::to_ascii($str, $unknown, $strict);
4906
  }
4907 11
4908
  /**
4909
   * Counts number of words in the UTF-8 string.
4910
   *
4911
   * @param string $str      <p>The input string.</p>
4912
   * @param int    $format   [optional] <p>
4913
   *                         <strong>0</strong> => return a number of words (default)<br />
4914
   *                         <strong>1</strong> => return an array of words<br />
4915
   *                         <strong>2</strong> => return an array of words with word-offset as key
4916
   *                         </p>
4917
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4918
   *
4919
   * @return array|int <p>The number of words in the string</p>
4920
   */
4921 21
  public static function str_word_count($str, $format = 0, $charlist = '')
4922
  {
4923
    $strParts = self::str_to_words($str, $charlist);
4924 21
4925
    $len = count($strParts);
4926 21
4927 6
    if ($format === 1) {
4928
4929
      $numberOfWords = array();
4930 19
      for ($i = 1; $i < $len; $i += 2) {
4931
        $numberOfWords[] = $strParts[$i];
4932
      }
4933
4934
    } elseif ($format === 2) {
4935
4936 19
      $numberOfWords = array();
4937 2
      $offset = self::strlen($strParts[0]);
4938 2
      for ($i = 1; $i < $len; $i += 2) {
4939
        $numberOfWords[$offset] = $strParts[$i];
4940 19
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4941
      }
4942
4943
    } else {
4944
4945
      $numberOfWords = ($len - 1) / 2;
4946
4947
    }
4948
4949
    return $numberOfWords;
4950 3
  }
4951
4952 3
  /**
4953
   * Case-insensitive string comparison.
4954
   *
4955
   * INFO: Case-insensitive version of UTF8::strcmp()
4956
   *
4957
   * @param string $str1
4958
   * @param string $str2
4959
   *
4960
   * @return int <p>
4961
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4962
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4963
   *             <strong>0</strong> if they are equal.
4964
   *             </p>
4965
   */
4966 16
  public static function strcasecmp($str1, $str2)
4967
  {
4968 16
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4969
  }
4970 16
4971 2
  /**
4972
   * alias for "UTF8::strstr()"
4973
   *
4974 15
   * @see UTF8::strstr()
4975
   *
4976
   * @param string  $haystack
4977
   * @param string  $needle
4978
   * @param bool    $before_needle
4979
   * @param string  $encoding
4980 15
   * @param boolean $cleanUtf8
4981 2
   *
4982 2
   * @return string|false
4983
   */
4984 15
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4985
  {
4986
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4987
  }
4988
4989
  /**
4990
   * Case-sensitive string comparison.
4991
   *
4992
   * @param string $str1
4993
   * @param string $str2
4994
   *
4995
   * @return int  <p>
4996
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4997
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4998
   *              <strong>0</strong> if they are equal.
4999
   *              </p>
5000
   */
5001 1
  public static function strcmp($str1, $str2)
5002
  {
5003 1
    /** @noinspection PhpUndefinedClassInspection */
5004 1
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5005 1
        \Normalizer::normalize($str1, \Normalizer::NFD),
5006 1
        \Normalizer::normalize($str2, \Normalizer::NFD)
5007 1
    );
5008
  }
5009 1
5010 1
  /**
5011 1
   * Find length of initial segment not matching mask.
5012 1
   *
5013 1
   * @param string $str
5014
   * @param string $charList
5015 1
   * @param int    $offset
5016 1
   * @param int    $length
5017
   *
5018 1
   * @return int|null
5019
   */
5020
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5021
  {
5022
    if ('' === $charList .= '') {
5023
      return null;
5024
    }
5025
5026 View Code Duplication
    if ($offset || $length !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5027
      $strTmp = self::substr($str, $offset, $length);
5028
      if ($strTmp === false) {
5029
        return null;
5030 1
      }
5031
      $str = $strTmp;
5032 1
    }
5033 1
5034 1
    $str = (string)$str;
5035
    if (!isset($str[0])) {
5036 1
      return null;
5037
    }
5038
5039
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5040 1
      /** @noinspection OffsetOperationsInspection */
5041 1
      return self::strlen($length[1]);
5042
    }
5043 1
5044
    return self::strlen($str);
5045
  }
5046
5047
  /**
5048
   * alias for "UTF8::stristr()"
5049
   *
5050
   * @see UTF8::stristr()
5051
   *
5052
   * @param string  $haystack
5053
   * @param string  $needle
5054
   * @param bool    $before_needle
5055
   * @param string  $encoding
5056
   * @param boolean $cleanUtf8
5057
   *
5058
   * @return string|false
5059 47
   */
5060
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5061
  {
5062 47
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5063
  }
5064 47
5065 9
  /**
5066
   * Create a UTF-8 string from code points.
5067
   *
5068 45
   * INFO: opposite to UTF8::codepoints()
5069
   *
5070
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5071
   *
5072 1
   * @return string <p>UTF-8 encoded string.</p>
5073 1
   */
5074
  public static function string(array $array)
5075 45
  {
5076 45
    return implode(
5077 37
        '',
5078 37
        array_map(
5079
            array(
5080 45
                '\\voku\\helper\\UTF8',
5081 2
                'chr',
5082
            ),
5083
            $array
5084 43
        )
5085 20
    );
5086 20
  }
5087 41
5088
  /**
5089
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5090 43
   *
5091
   * @param string $str <p>The input string.</p>
5092
   *
5093
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5094
   */
5095
  public static function string_has_bom($str)
5096 43
  {
5097 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
5098 43
      if (0 === strpos($str, $bomString)) {
5099 43
        return true;
5100 43
      }
5101 1
    }
5102
5103
    return false;
5104 43
  }
5105 43
5106
  /**
5107
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5108
   *
5109
   * @link http://php.net/manual/en/function.strip-tags.php
5110
   *
5111
   * @param string  $str            <p>
5112
   *                                The input string.
5113
   *                                </p>
5114
   * @param string  $allowable_tags [optional] <p>
5115
   *                                You can use the optional second parameter to specify tags which should
5116
   *                                not be stripped.
5117
   *                                </p>
5118
   *                                <p>
5119
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5120
   *                                can not be changed with allowable_tags.
5121
   *                                </p>
5122
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
5123
   *
5124
   * @return string <p>The stripped string.</p>
5125
   */
5126
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
5127
  {
5128
    $str = (string)$str;
5129
5130
    if (!isset($str[0])) {
5131
      return '';
5132
    }
5133
5134
    if ($cleanUtf8) {
5135 1
      $str = self::clean($str);
5136
    }
5137 1
5138 1
    return strip_tags($str, $allowable_tags);
5139
  }
5140 1
5141
  /**
5142
   * Finds position of first occurrence of a string within another, case insensitive.
5143
   *
5144
   * @link http://php.net/manual/en/function.mb-stripos.php
5145
   *
5146
   * @param string  $haystack  <p>
5147
   *                           The string from which to get the position of the first occurrence
5148
   *                           of needle
5149
   *                           </p>
5150
   * @param string  $needle    <p>
5151
   *                           The string to find in haystack
5152
   *                           </p>
5153
   * @param int     $offset    [optional] <p>
5154
   *                           The position in haystack
5155
   *                           to start searching
5156
   *                           </p>
5157
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5158
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5159
   *
5160
   * @return int|false <p>
5161 1
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
5162
   *                   or false if needle is not found.
5163 1
   *                   </p>
5164 1
   */
5165
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5166 1
  {
5167 1
    $haystack = (string)$haystack;
5168
    $needle = (string)$needle;
5169
    $offset = (int)$offset;
5170 1
5171 1
    if (!isset($haystack[0], $needle[0])) {
5172 1
      return false;
5173
    }
5174 1
5175 1
    if ($cleanUtf8 === true) {
5176
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5177
      // if invalid characters are found in $haystack before $needle
5178 1
      $haystack = self::clean($haystack);
5179 1
      $needle = self::clean($needle);
5180
    }
5181 1
5182 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5183 1
        $encoding === 'UTF-8'
5184
        ||
5185 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5186
    ) {
5187
      $encoding = 'UTF-8';
5188
    } else {
5189
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5190
    }
5191
5192 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5193
      self::checkForSupport();
5194
    }
5195
5196
    if (
5197
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5198
        &&
5199
        self::$SUPPORT['intl'] === true
5200
        &&
5201
        Bootup::is_php('5.4') === true
5202
    ) {
5203
      return \grapheme_stripos($haystack, $needle, $offset);
5204
    }
5205
5206
    // fallback to "mb_"-function via polyfill
5207 6
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5208
  }
5209 6
5210 1
  /**
5211
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5212
   *
5213 1
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5214 1
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5215 1
   * @param bool    $before_needle [optional] <p>
5216 1
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5217
   *                               haystack before the first occurrence of the needle (excluding the needle).
5218
   *                               </p>
5219
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5220 1
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5221 1
   *
5222 1
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
5223 1
   */
5224 1 View Code Duplication
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5225 1
  {
5226 1
    $haystack = (string)$haystack;
5227 1
    $needle = (string)$needle;
5228
    $before_needle = (bool)$before_needle;
5229
5230
    if (!isset($haystack[0], $needle[0])) {
5231 1
      return false;
5232 1
    }
5233 1
5234 1
    if ($encoding !== 'UTF-8') {
5235 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5236 1
    }
5237 1
5238 1
    if ($cleanUtf8 === true) {
5239
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5240
      // if invalid characters are found in $haystack before $needle
5241 1
      $needle = self::clean($needle);
5242 1
      $haystack = self::clean($haystack);
5243 1
    }
5244 1
5245
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5246
      self::checkForSupport();
5247
    }
5248 1
5249
    if (
5250 6
        $encoding !== 'UTF-8'
5251 1
        &&
5252 1
        self::$SUPPORT['mbstring'] === false
5253 1
    ) {
5254 1
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5255
    }
5256 1
5257
    if (self::$SUPPORT['mbstring'] === true) {
5258
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5259 6
    }
5260 6
5261
    if (
5262 6
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5263 4
        &&
5264 4
        self::$SUPPORT['intl'] === true
5265
        &&
5266 6
        Bootup::is_php('5.4') === true
5267
    ) {
5268 6
      return \grapheme_stristr($haystack, $needle, $before_needle);
5269
    }
5270
5271
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5272
5273
    if (!isset($match[1])) {
5274
      return false;
5275
    }
5276
5277
    if ($before_needle) {
5278
      return $match[1];
5279
    }
5280 1
5281
    return self::substr($haystack, self::strlen($match[1]));
5282 1
  }
5283
5284 1
  /**
5285 1
   * Get the string length, not the byte-length!
5286
   *
5287
   * @link     http://php.net/manual/en/function.mb-strlen.php
5288 1
   *
5289 1
   * @param string  $str       <p>The string being checked for length.</p>
5290 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5291
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5292 1
   *
5293
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5294
   *             character counted as +1)</p>
5295 1
   */
5296 1
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5297
  {
5298 1
    $str = (string)$str;
5299 1
5300
    if (!isset($str[0])) {
5301 1
      return 0;
5302
    }
5303 1
5304 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5305
        $encoding === 'UTF-8'
5306 1
        ||
5307
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5308 1
    ) {
5309
      $encoding = 'UTF-8';
5310 1
    } else {
5311
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5312 1
    }
5313
5314
    switch ($encoding) {
5315
      case 'ASCII':
5316
      case 'CP850':
5317
        if (
5318
            $encoding === 'CP850'
5319
            &&
5320
            self::$SUPPORT['mbstring_func_overload'] === false
5321
        ) {
5322
          return strlen($str);
5323
        }
5324
5325
        return \mb_strlen($str, '8BIT');
5326 7
    }
5327
5328 7
    if ($cleanUtf8 === true) {
5329
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5330
      // if invalid characters are found in $str
5331
      $str = self::clean($str);
5332
    }
5333
5334
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5335
      self::checkForSupport();
5336
    }
5337
5338 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5339
        $encoding !== 'UTF-8'
5340 1
        &&
5341
        self::$SUPPORT['mbstring'] === false
5342 1
        &&
5343
        self::$SUPPORT['iconv'] === false
5344
    ) {
5345
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5346
    }
5347
5348
    if (
5349
        $encoding !== 'UTF-8'
5350
        &&
5351
        self::$SUPPORT['iconv'] === true
5352
        &&
5353
        self::$SUPPORT['mbstring'] === false
5354 1
    ) {
5355
      return \iconv_strlen($str, $encoding);
5356 1
    }
5357
5358
    if (self::$SUPPORT['mbstring'] === true) {
5359
      return \mb_strlen($str, $encoding);
5360
    }
5361
5362
    if (self::$SUPPORT['iconv'] === true) {
5363
      return \iconv_strlen($str, $encoding);
5364
    }
5365
5366
    if (
5367
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5368 1
        &&
5369
        self::$SUPPORT['intl'] === true
5370 1
        &&
5371
        Bootup::is_php('5.4') === true
5372
    ) {
5373
      return \grapheme_strlen($str);
5374
    }
5375
5376
    // fallback via vanilla php
5377
    preg_match_all('/./us', $str, $parts);
5378
    $returnTmp = count($parts[0]);
5379
    if ($returnTmp !== 0) {
5380
      return $returnTmp;
5381
    }
5382
5383
    // fallback to "mb_"-function via polyfill
5384
    return \mb_strlen($str, $encoding);
5385 13
  }
5386
5387 13
  /**
5388
   * Case insensitive string comparisons using a "natural order" algorithm.
5389
   *
5390 13
   * INFO: natural order version of UTF8::strcasecmp()
5391
   *
5392 13
   * @param string $str1 <p>The first string.</p>
5393 3
   * @param string $str2 <p>The second string.</p>
5394
   *
5395
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5396 11
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5397
   *             <strong>0</strong> if they are equal
5398
   */
5399 11
  public static function strnatcasecmp($str1, $str2)
5400 7
  {
5401
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5402
  }
5403 5
5404 1
  /**
5405
   * String comparisons using a "natural order" algorithm
5406
   *
5407
   * INFO: natural order version of UTF8::strcmp()
5408 1
   *
5409 1
   * @link  http://php.net/manual/en/function.strnatcmp.php
5410
   *
5411
   * @param string $str1 <p>The first string.</p>
5412 1
   * @param string $str2 <p>The second string.</p>
5413 1
   *
5414
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5415
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5416 1
   *             <strong>0</strong> if they are equal
5417
   */
5418
  public static function strnatcmp($str1, $str2)
5419 1
  {
5420
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5421 5
  }
5422 5
5423 5
  /**
5424
   * Case-insensitive string comparison of the first n characters.
5425 5
   *
5426
   * @link  http://php.net/manual/en/function.strncasecmp.php
5427 5
   *
5428 5
   * @param string $str1 <p>The first string.</p>
5429
   * @param string $str2 <p>The second string.</p>
5430
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5431 5
   *
5432
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5433
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5434 5
   *             <strong>0</strong> if they are equal
5435 5
   */
5436 5
  public static function strncasecmp($str1, $str2, $len)
5437
  {
5438 5
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5439 2
  }
5440
5441 2
  /**
5442 2
   * String comparison of the first n characters.
5443 2
   *
5444
   * @link  http://php.net/manual/en/function.strncmp.php
5445 2
   *
5446 1
   * @param string $str1 <p>The first string.</p>
5447
   * @param string $str2 <p>The second string.</p>
5448 1
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5449 1
   *
5450 1
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5451
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5452 1
   *             <strong>0</strong> if they are equal
5453
   */
5454
  public static function strncmp($str1, $str2, $len)
5455
  {
5456
    $str1 = (string)self::substr($str1, 0, $len);
5457
    $str2 = (string)self::substr($str2, 0, $len);
5458
5459
    return self::strcmp($str1, $str2);
5460
  }
5461
5462
  /**
5463
   * Search a string for any of a set of characters.
5464
   *
5465
   * @link  http://php.net/manual/en/function.strpbrk.php
5466
   *
5467 1
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5468 2
   * @param string $char_list <p>This parameter is case sensitive.</p>
5469
   *
5470 5
   * @return string String starting from the character found, or false if it is not found.
5471
   */
5472
  public static function strpbrk($haystack, $char_list)
5473
  {
5474
    $haystack = (string)$haystack;
5475 5
    $char_list = (string)$char_list;
5476
5477
    if (!isset($haystack[0], $char_list[0])) {
5478
      return false;
5479
    }
5480 5
5481 5
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5482 1
      return substr($haystack, strpos($haystack, $m[0]));
5483 1
    }
5484
5485 1
    return false;
5486 1
  }
5487 1
5488
  /**
5489 1
   * Find position of first occurrence of string in a string.
5490
   *
5491 5
   * @link http://php.net/manual/en/function.mb-strpos.php
5492 5
   *
5493 5
   * @param string  $haystack  <p>The string being checked.</p>
5494 5
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5495 1
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5496
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5497 5
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5498
   *
5499 5
   * @return int|false <p>
5500
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5501
   *                   If needle is not found it returns false.
5502
   *                   </p>
5503
   */
5504
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5505
  {
5506
    $haystack = (string)$haystack;
5507
    $needle = (string)$needle;
5508
5509 2
    if (!isset($haystack[0], $needle[0])) {
5510
      return false;
5511 2
    }
5512
5513 1
    // init
5514
    $offset = (int)$offset;
5515
5516 1
    // iconv and mbstring do not support integer $needle
5517 1
5518
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5519 1
      $needle = (string)self::chr($needle);
5520
    }
5521
5522 2
    if ($cleanUtf8 === true) {
5523
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5524 2
      // if invalid characters are found in $haystack before $needle
5525 1
      $needle = self::clean($needle);
5526
      $haystack = self::clean($haystack);
5527
    }
5528 2
5529 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5530
        $encoding === 'UTF-8'
5531
        ||
5532
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5533
    ) {
5534
      $encoding = 'UTF-8';
5535
    } else {
5536
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5537
    }
5538
5539
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5540 1
      self::checkForSupport();
5541
    }
5542 1
5543
    if (
5544
        $encoding === 'CP850'
5545
        &&
5546
        self::$SUPPORT['mbstring_func_overload'] === false
5547
    ) {
5548
      return strpos($haystack, $needle, $offset);
5549
    }
5550
5551 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5552
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5553
        &
5554
        self::$SUPPORT['iconv'] === true
5555
        &&
5556
        self::$SUPPORT['mbstring'] === false
5557
    ) {
5558
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5559
    }
5560
5561
    if (
5562
        $offset >= 0 // iconv_strpos() can't handle negative offset
5563
        &&
5564
        $encoding !== 'UTF-8'
5565
        &&
5566
        self::$SUPPORT['mbstring'] === false
5567
        &&
5568 20
        self::$SUPPORT['iconv'] === true
5569
    ) {
5570 20
      // ignore invalid negative offset to keep compatibility
5571 2
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5572
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5573
    }
5574 2
5575 2
    if (self::$SUPPORT['mbstring'] === true) {
5576
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5577 2
    }
5578
5579
    if (
5580 20
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5581
        &&
5582 20
        self::$SUPPORT['intl'] === true
5583 4
        &&
5584
        Bootup::is_php('5.4') === true
5585
    ) {
5586 19
      return \grapheme_strpos($haystack, $needle, $offset);
5587 19
    }
5588
5589
    if (
5590 19
        $offset >= 0 // iconv_strpos() can't handle negative offset
5591 19
        &&
5592
        self::$SUPPORT['iconv'] === true
5593 19
    ) {
5594 19
      // ignore invalid negative offset to keep compatibility
5595 19
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5596 19
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5597
    }
5598 19
5599
    // fallback via vanilla php
5600 16
5601 16
    $haystackTmp = self::substr($haystack, $offset);
5602 16
    if ($haystackTmp === false) {
5603 16
      $haystackTmp = '';
5604 5
    }
5605 5
    $haystack = (string)$haystackTmp;
5606 5
5607
    if ($offset < 0) {
5608
      $offset = 0;
5609 19
    }
5610
5611 17
    $pos = strpos($haystack, $needle);
5612 13
    if ($pos === false) {
5613 13
      return false;
5614 13
    }
5615 8
5616 8
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5617 8
    if ($returnTmp !== false) {
5618
      return $returnTmp;
5619
    }
5620 19
5621
    // fallback to "mb_"-function via polyfill
5622 9
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5623 4
  }
5624 4
5625 4
  /**
5626 6
   * Finds the last occurrence of a character in a string within another.
5627 6
   *
5628 6
   * @link http://php.net/manual/en/function.mb-strrchr.php
5629
   *
5630
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5631 9
   * @param string $needle        <p>The string to find in haystack</p>
5632 6
   * @param bool   $before_needle [optional] <p>
5633 6
   *                              Determines which portion of haystack
5634 6
   *                              this function returns.
5635
   *                              If set to true, it returns all of haystack
5636
   *                              from the beginning to the last occurrence of needle.
5637 19
   *                              If set to false, it returns all of haystack
5638
   *                              from the last occurrence of needle to the end,
5639 4
   *                              </p>
5640 4
   * @param string $encoding      [optional] <p>
5641 2
   *                              Character encoding name to use.
5642 2
   *                              If it is omitted, internal character encoding is used.
5643 3
   *                              </p>
5644 3
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5645 3
   *
5646
   * @return string|false The portion of haystack or false if needle is not found.
5647
   */
5648 4 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5649 16
  {
5650
    if ($encoding !== 'UTF-8') {
5651 19
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5652
    }
5653
5654 19
    if ($cleanUtf8 === true) {
5655 19
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5656
      // if invalid characters are found in $haystack before $needle
5657 3
      $needle = self::clean($needle);
5658 19
      $haystack = self::clean($haystack);
5659
    }
5660 19
5661
    // fallback to "mb_"-function via polyfill
5662
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5663 19
  }
5664 19
5665 19
  /**
5666 2
   * Reverses characters order in the string.
5667 19
   *
5668
   * @param string $str The input string
5669 19
   *
5670
   * @return string The string with characters in the reverse sequence
5671 19
   */
5672
  public static function strrev($str)
5673
  {
5674
    $str = (string)$str;
5675
5676
    if (!isset($str[0])) {
5677
      return '';
5678
    }
5679
5680
    return implode('', array_reverse(self::split($str)));
5681
  }
5682
5683
  /**
5684
   * Finds the last occurrence of a character in a string within another, case insensitive.
5685
   *
5686
   * @link http://php.net/manual/en/function.mb-strrichr.php
5687 26
   *
5688
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5689 26
   * @param string  $needle        <p>The string to find in haystack.</p>
5690
   * @param bool    $before_needle [optional] <p>
5691 26
   *                               Determines which portion of haystack
5692 5
   *                               this function returns.
5693
   *                               If set to true, it returns all of haystack
5694
   *                               from the beginning to the last occurrence of needle.
5695
   *                               If set to false, it returns all of haystack
5696 22
   *                               from the last occurrence of needle to the end,
5697 6
   *                               </p>
5698
   * @param string  $encoding      [optional] <p>
5699
   *                               Character encoding name to use.
5700 16
   *                               If it is omitted, internal character encoding is used.
5701
   *                               </p>
5702
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5703
   *
5704
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5705
   */
5706 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5707
  {
5708
    if ($encoding !== 'UTF-8') {
5709
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5710
    }
5711
5712 14
    if ($cleanUtf8 === true) {
5713
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5714 14
      // if invalid characters are found in $haystack before $needle
5715
      $needle = self::clean($needle);
5716
      $haystack = self::clean($haystack);
5717
    }
5718
5719
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5720
  }
5721
5722
  /**
5723
   * Find position of last occurrence of a case-insensitive string.
5724
   *
5725
   * @param string  $haystack  <p>The string to look in.</p>
5726
   * @param string  $needle    <p>The string to look for.</p>
5727
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5728 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5729
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5730 1
   *
5731
   * @return int|false <p>
5732
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5733
   *                   not found, it returns false.
5734
   *                   </p>
5735
   */
5736
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5737
  {
5738
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5739
      $needle = (string)self::chr($needle);
5740
    }
5741
5742
    // init
5743
    $haystack = (string)$haystack;
5744 8
    $needle = (string)$needle;
5745
    $offset = (int)$offset;
5746 8
5747 2
    if (!isset($haystack[0], $needle[0])) {
5748
      return false;
5749
    }
5750 7
5751 7 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5752 7
        $cleanUtf8 === true
5753
        ||
5754 7
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5755 1
    ) {
5756 1
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5757 7
5758
      $needle = self::clean($needle);
5759
      $haystack = self::clean($haystack);
5760 7
    }
5761
5762 7 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5763 7
        $encoding === 'UTF-8'
5764
        ||
5765
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5766
    ) {
5767 7
      $encoding = 'UTF-8';
5768
    } else {
5769
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5770
    }
5771 1
5772 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5773 1
      self::checkForSupport();
5774 7
    }
5775 7
5776 7
    if (
5777
        $encoding !== 'UTF-8'
5778 7
        &&
5779 7
        self::$SUPPORT['mbstring'] === false
5780
    ) {
5781 7
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5782
    }
5783
5784
    if (self::$SUPPORT['mbstring'] === true) {
5785
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5786
    }
5787
5788
    if (
5789
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5790
        &&
5791
        self::$SUPPORT['intl'] === true
5792
        &&
5793
        Bootup::is_php('5.4') === true
5794
    ) {
5795
      return \grapheme_strripos($haystack, $needle, $offset);
5796
    }
5797
5798
    // fallback via vanilla php
5799
5800
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5801 1
  }
5802
5803 1
  /**
5804
   * Find position of last occurrence of a string in a string.
5805 1
   *
5806 1
   * @link http://php.net/manual/en/function.mb-strrpos.php
5807
   *
5808
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5809 1
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5810
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5811 1
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5812
   *                              the end of the string.
5813 1
   *                              </p>
5814 1
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5815 1
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5816 1
   *
5817
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5818 1
   *                   is not found, it returns false.</p>
5819 1
   */
5820 1
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5821
  {
5822 1
    if ((int)$needle === $needle && $needle >= 0) {
5823
      $needle = (string)self::chr($needle);
5824
    }
5825
5826
    // init
5827
    $haystack = (string)$haystack;
5828
    $needle = (string)$needle;
5829
    $offset = (int)$offset;
5830 1
5831
    if (!isset($haystack[0], $needle[0])) {
5832
      return false;
5833
    }
5834
5835 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5836
        $cleanUtf8 === true
5837
        ||
5838
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5839
    ) {
5840
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5841
      $needle = self::clean($needle);
5842
      $haystack = self::clean($haystack);
5843
    }
5844
5845 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5846
        $encoding === 'UTF-8'
5847
        ||
5848
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5849
    ) {
5850
      $encoding = 'UTF-8';
5851
    } else {
5852
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5853
    }
5854
5855
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5856
      self::checkForSupport();
5857
    }
5858
5859
    if (
5860
        $encoding !== 'UTF-8'
5861
        &&
5862
        self::$SUPPORT['mbstring'] === false
5863
    ) {
5864
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5865
    }
5866
5867
    if (self::$SUPPORT['mbstring'] === true) {
5868
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5869
    }
5870
5871
    if (
5872
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5873
        &&
5874
        self::$SUPPORT['intl'] === true
5875
        &&
5876
        Bootup::is_php('5.4') === true
5877
    ) {
5878
      return \grapheme_strrpos($haystack, $needle, $offset);
5879
    }
5880
5881
    // fallback via vanilla php
5882
5883
    $haystackTmp = null;
5884
    if ($offset > 0) {
5885
      $haystackTmp = self::substr($haystack, $offset);
5886
    } elseif ($offset < 0) {
5887
      $haystackTmp = self::substr($haystack, 0, $offset);
5888
      $offset = 0;
5889
    }
5890
5891
    if ($haystackTmp !== null) {
5892
      if ($haystackTmp === false) {
5893
        $haystackTmp = '';
5894
      }
5895
      $haystack = (string)$haystackTmp;
5896
    }
5897
5898
    $pos = strrpos($haystack, $needle);
5899
    if ($pos === false) {
5900
      return false;
5901
    }
5902
5903
    return $offset + self::strlen(substr($haystack, 0, $pos));
5904
  }
5905
5906
  /**
5907
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5908
   * mask.
5909
   *
5910
   * @param string $str    <p>The input string.</p>
5911
   * @param string $mask   <p>The mask of chars</p>
5912
   * @param int    $offset [optional]
5913
   * @param int    $length [optional]
5914
   *
5915
   * @return int
5916
   */
5917
  public static function strspn($str, $mask, $offset = 0, $length = null)
5918
  {
5919 View Code Duplication
    if ($offset || $length !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5920
      $strTmp = self::substr($str, $offset, $length);
5921
      if ($strTmp === false) {
5922
        $strTmp = '';
5923
      }
5924
      $str = (string)$strTmp;
5925
    }
5926
5927
    $str = (string)$str;
5928
    if (!isset($str[0], $mask[0])) {
5929
      return 0;
5930
    }
5931
5932
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5933
  }
5934
5935
  /**
5936
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5937
   *
5938
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5939
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5940
   * @param bool    $before_needle [optional] <p>
5941
   *                               If <b>TRUE</b>, strstr() returns the part of the
5942
   *                               haystack before the first occurrence of the needle (excluding the needle).
5943
   *                               </p>
5944
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5945
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5946
   *
5947
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5948
   */
5949 View Code Duplication
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5950
  {
5951
    $haystack = (string)$haystack;
5952
    $needle = (string)$needle;
5953
5954
    if (!isset($haystack[0], $needle[0])) {
5955
      return false;
5956
    }
5957
5958
    if ($cleanUtf8 === true) {
5959
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5960
      // if invalid characters are found in $haystack before $needle
5961
      $needle = self::clean($needle);
5962
      $haystack = self::clean($haystack);
5963
    }
5964
5965
    if ($encoding !== 'UTF-8') {
5966
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5967
    }
5968
5969
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5970
      self::checkForSupport();
5971
    }
5972
5973
    if (
5974
        $encoding !== 'UTF-8'
5975
        &&
5976
        self::$SUPPORT['mbstring'] === false
5977
    ) {
5978
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5979
    }
5980
5981
    if (self::$SUPPORT['mbstring'] === true) {
5982
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5983
    }
5984
5985
    if (
5986
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5987
        &&
5988
        self::$SUPPORT['intl'] === true
5989
        &&
5990
        Bootup::is_php('5.4') === true
5991
    ) {
5992
      return \grapheme_strstr($haystack, $needle, $before_needle);
5993
    }
5994
5995
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5996
5997
    if (!isset($match[1])) {
5998
      return false;
5999
    }
6000
6001
    if ($before_needle) {
6002
      return $match[1];
6003
    }
6004
6005
    return self::substr($haystack, self::strlen($match[1]));
6006
  }
6007
6008
  /**
6009
   * Unicode transformation for case-less matching.
6010
   *
6011
   * @link http://unicode.org/reports/tr21/tr21-5.html
6012
   *
6013
   * @param string  $str       <p>The input string.</p>
6014
   * @param bool    $full      [optional] <p>
6015
   *                           <b>true</b>, replace full case folding chars (default)<br />
6016
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6017
   *                           </p>
6018
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6019
   *
6020
   * @return string
6021
   */
6022
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6023
  {
6024
    // init
6025
    $str = (string)$str;
6026
6027
    if (!isset($str[0])) {
6028
      return '';
6029
    }
6030
6031
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6032
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6033
6034
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6035
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6036
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6037
    }
6038
6039
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6040
6041
    if ($full) {
6042
6043
      static $FULL_CASE_FOLD = null;
6044
6045
      if ($FULL_CASE_FOLD === null) {
6046
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6047
      }
6048
6049
      /** @noinspection OffsetOperationsInspection */
6050
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6051
    }
6052
6053
    if ($cleanUtf8 === true) {
6054
      $str = self::clean($str);
6055
    }
6056
6057 1
    return self::strtolower($str);
6058
  }
6059 1
6060
  /**
6061
   * Make a string lowercase.
6062
   *
6063
   * @link http://php.net/manual/en/function.mb-strtolower.php
6064
   *
6065
   * @param string      $str       <p>The string being lowercased.</p>
6066
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6067
   * @param boolean     $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6068
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6069 6
   *
6070
   * @return string str with all alphabetic characters converted to lowercase.
6071 6
   */
6072 6 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6073
  {
6074 6
    // init
6075
    $str = (string)$str;
6076 6
6077 3
    if (!isset($str[0])) {
6078
      return '';
6079
    }
6080
6081 6
    if ($cleanUtf8 === true) {
6082
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6083 6
      // if invalid characters are found in $haystack before $needle
6084 1
      $str = self::clean($str);
6085 1
    }
6086 1
6087
    if ($encoding !== 'UTF-8') {
6088 6
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6089
    }
6090
6091
    if ($lang !== null) {
6092
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6093
        self::checkForSupport();
6094
      }
6095
6096
      if (
6097
          self::$SUPPORT['intl'] === true
6098 6
          &&
6099
          Bootup::is_php('5.4') === true
6100 6
      ) {
6101
6102 6
        $langCode = $lang . '-Lower';
6103 6
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6104
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6105
6106 5
          $langCode = 'Any-Lower';
6107 5
        }
6108
6109 5
        return transliterator_transliterate($langCode, $str);
6110 1
      }
6111 1
6112 1
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6113
    }
6114 5
6115
    return \mb_strtolower($str, $encoding);
6116
  }
6117
6118
  /**
6119
   * Generic case sensitive transformation for collation matching.
6120
   *
6121
   * @param string $str <p>The input string</p>
6122
   *
6123
   * @return string
6124
   */
6125
  private static function strtonatfold($str)
6126
  {
6127
    /** @noinspection PhpUndefinedClassInspection */
6128
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6129
  }
6130
6131
  /**
6132
   * Make a string uppercase.
6133
   *
6134
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6135
   *
6136
   * @param string      $str       <p>The string being uppercased.</p>
6137
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6138
   * @param boolean     $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6139
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6140
   *
6141
   * @return string str with all alphabetic characters converted to uppercase.
6142
   */
6143 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6144 1
  {
6145
    $str = (string)$str;
6146 1
6147
    if (!isset($str[0])) {
6148
      return '';
6149
    }
6150
6151
    if ($cleanUtf8 === true) {
6152
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6153
      // if invalid characters are found in $haystack before $needle
6154
      $str = self::clean($str);
6155
    }
6156
6157
    if ($encoding !== 'UTF-8') {
6158 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6159
    }
6160 1
6161
    if ($lang !== null) {
6162 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6163 1
        self::checkForSupport();
6164
      }
6165
6166 1
      if (
6167
          self::$SUPPORT['intl'] === true
6168 1
          &&
6169 1
          Bootup::is_php('5.4') === true
6170
      ) {
6171
6172 1
        $langCode = $lang . '-Upper';
6173
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6174
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6175 1
6176 1
          $langCode = 'Any-Upper';
6177 1
        }
6178 1
6179 1
        return transliterator_transliterate($langCode, $str);
6180
      }
6181
6182 1
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6183
    }
6184
6185
    return \mb_strtoupper($str, $encoding);
6186
  }
6187
6188
  /**
6189
   * Translate characters or replace sub-strings.
6190
   *
6191
   * @link  http://php.net/manual/en/function.strtr.php
6192
   *
6193
   * @param string          $str  <p>The string being translated.</p>
6194
   * @param string|string[] $from <p>The string replacing from.</p>
6195
   * @param string|string[] $to   <p>The string being translated to to.</p>
6196
   *
6197
   * @return string <p>
6198
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6199
   *                corresponding character in to.
6200
   *                </p>
6201 10
   */
6202
  public static function strtr($str, $from, $to = INF)
6203 10
  {
6204 10
    $str = (string)$str;
6205
6206 10
    if (!isset($str[0])) {
6207 3
      return '';
6208
    }
6209
6210 8
    if ($from === $to) {
6211 8
      return $str;
6212 8
    }
6213
6214 8
    if (INF !== $to) {
6215
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6215 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6216 8
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6216 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6217
      $countFrom = count($from);
6218 8
      $countTo = count($to);
6219 1
6220 1
      if ($countFrom > $countTo) {
6221 1
        $from = array_slice($from, 0, $countTo);
6222
      } elseif ($countFrom < $countTo) {
6223 8
        $to = array_slice($to, 0, $countFrom);
6224 8
      }
6225
6226 8
      $from = array_combine($from, $to);
6227 8
    }
6228 8
6229 8
    if (is_string($from)) {
6230 8
      return str_replace($from, '', $str);
6231
    }
6232 8
6233 8
    return strtr($str, $from);
6234 8
  }
6235 8
6236
  /**
6237 8
   * Return the width of a string.
6238 6
   *
6239 6
   * @param string  $str       <p>The input string.</p>
6240 6
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6241 6
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6242
   *
6243 6
   * @return int
6244 3
   */
6245 3
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6246
  {
6247 6
    if ($encoding !== 'UTF-8') {
6248 6
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6249
    }
6250 8
6251
    if ($cleanUtf8 === true) {
6252
      // iconv and mbstring are not tolerant to invalid encoding
6253
      // further, their behaviour is inconsistent with that of PHP's substr
6254
      $str = self::clean($str);
6255
    }
6256
6257
    // fallback to "mb_"-function via polyfill
6258 1
    return \mb_strwidth($str, $encoding);
6259
  }
6260 1
6261
  /**
6262
   * Changes all keys in an array.
6263
   *
6264
   * @param array $array <p>The array to work on</p>
6265
   * @param int $case [optional] <p> Either <strong>CASE_UPPER</strong><br />
6266
   *                  or <strong>CASE_LOWER</strong> (default)</p>
6267
   *
6268
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6269
   *                     input is not an array.</p>
6270
   */
6271
  public static function array_change_key_case($array, $case = CASE_LOWER)
6272
  {
6273
    if (!is_array($array)) {
6274
      return false;
6275
    }
6276
6277
    if (
6278
        $case !== CASE_LOWER
6279
        &&
6280
        $case !== CASE_UPPER
6281
    ) {
6282
      $case = CASE_UPPER;
6283
    }
6284
6285
    $return = array();
6286
    foreach ($array as $key => $value) {
6287
      if ($case  === CASE_LOWER) {
6288
        $key = self::strtolower($key);
6289
      } else {
6290
        $key = self::strtoupper($key);
6291
      }
6292
6293
      $return[$key] = $value;
6294
    }
6295
6296
    return $return;
6297
  }
6298
6299
  /**
6300
   * Get part of a string.
6301
   *
6302
   * @link http://php.net/manual/en/function.mb-substr.php
6303
   *
6304
   * @param string  $str       <p>The string being checked.</p>
6305
   * @param int     $offset    <p>The first position used in str.</p>
6306
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6307
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6308
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6309
   *
6310
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6311
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6312
   *                      characters long, <b>FALSE</b> will be returned.</p>
6313
   */
6314
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6315
  {
6316
    // init
6317
    $str = (string)$str;
6318
6319
    if (!isset($str[0])) {
6320
      return '';
6321
    }
6322
6323
    if ($cleanUtf8 === true) {
6324
      // iconv and mbstring are not tolerant to invalid encoding
6325
      // further, their behaviour is inconsistent with that of PHP's substr
6326
      $str = self::clean($str);
6327
    }
6328
6329
    $str_length = 0;
6330
    if ($offset || $length === null) {
6331
      $str_length = (int)self::strlen($str, $encoding);
6332
    }
6333
6334
    if ($offset && $offset > $str_length) {
6335
      return false;
6336
    }
6337
6338
    if ($length === null) {
6339
      $length = $str_length;
6340
    } else {
6341
      $length = (int)$length;
6342
    }
6343
6344 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6345
        $encoding === 'UTF-8'
6346
        ||
6347
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6348
    ) {
6349
      $encoding = 'UTF-8';
6350
    } else {
6351
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6352
    }
6353
6354
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6355
      self::checkForSupport();
6356
    }
6357
6358
    if (
6359
        $encoding === 'CP850'
6360
        &&
6361
        self::$SUPPORT['mbstring_func_overload'] === false
6362
    ) {
6363
      return substr($str, $offset, $length === null ? $str_length : $length);
6364
    }
6365
6366
    if (
6367
        $encoding !== 'UTF-8'
6368
        &&
6369
        self::$SUPPORT['mbstring'] === false
6370
    ) {
6371
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6372
    }
6373
6374
    if (self::$SUPPORT['mbstring'] === true) {
6375
      return \mb_substr($str, $offset, $length, $encoding);
6376
    }
6377
6378
    if (
6379
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6380
        &&
6381
        self::$SUPPORT['intl'] === true
6382
        &&
6383
        Bootup::is_php('5.4') === true
6384
    ) {
6385
      return \grapheme_substr($str, $offset, $length);
6386
    }
6387
6388
    if (
6389
        $length >= 0 // "iconv_substr()" can't handle negative length
6390
        &&
6391
        self::$SUPPORT['iconv'] === true
6392
    ) {
6393
      return \iconv_substr($str, $offset, $length);
6394
    }
6395
6396
    // fallback via vanilla php
6397
6398
    // split to array, and remove invalid characters
6399
    $array = self::split($str);
6400
6401
    // extract relevant part, and join to make sting again
6402
    return implode('', array_slice($array, $offset, $length));
6403
  }
6404
6405
  /**
6406
   * Binary safe comparison of two strings from an offset, up to length characters.
6407
   *
6408
   * @param string  $str1               <p>The main string being compared.</p>
6409
   * @param string  $str2               <p>The secondary string being compared.</p>
6410
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6411
   *                                    counting from the end of the string.</p>
6412
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6413
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6414
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6415
   *                                    insensitive.</p>
6416
   *
6417
   * @return int <p>
6418
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
6419
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
6420
   *             <strong>0</strong> if they are equal.
6421
   *             </p>
6422
   */
6423
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6424
  {
6425
    if (
6426
        $offset !== 0
6427
        ||
6428
        $length !== null
6429
    ) {
6430
      $str1Tmp = self::substr($str1, $offset, $length);
6431
      if ($str1Tmp === false) {
6432
        $str1Tmp = '';
6433
      }
6434
      $str1 = (string)$str1Tmp;
6435
6436
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6437
      if ($str2Tmp === false) {
6438
        $str2Tmp = '';
6439
      }
6440
      $str2 = (string)$str2Tmp;
6441
    }
6442
6443
    if ($case_insensitivity === true) {
6444
      return self::strcasecmp($str1, $str2);
6445
    }
6446
6447
    return self::strcmp($str1, $str2);
6448
  }
6449
6450
  /**
6451
   * Count the number of substring occurrences.
6452
   *
6453
   * @link  http://php.net/manual/en/function.substr-count.php
6454
   *
6455
   * @param string  $haystack  <p>The string to search in.</p>
6456
   * @param string  $needle    <p>The substring to search for.</p>
6457
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6458
   * @param int     $length    [optional] <p>
6459
   *                           The maximum length after the specified offset to search for the
6460
   *                           substring. It outputs a warning if the offset plus the length is
6461
   *                           greater than the haystack length.
6462
   *                           </p>
6463
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
6464
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6465
   *
6466
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6467
   */
6468
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6469
  {
6470
    // init
6471
    $haystack = (string)$haystack;
6472
    $needle = (string)$needle;
6473
6474
    if (!isset($haystack[0], $needle[0])) {
6475
      return false;
6476
    }
6477
6478
    if ($offset || $length !== null) {
6479
6480
      if ($length === null) {
6481
        $length = (int)self::strlen($haystack);
6482
      }
6483
6484
      $offset = (int)$offset;
6485
      $length = (int)$length;
6486
6487
      if (
6488
          (
6489
            $length !== 0
6490
            &&
6491
            $offset !== 0
6492
          )
6493
          &&
6494
          $length + $offset <= 0
6495
          &&
6496
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6497
      ) {
6498
        return false;
6499
      }
6500
6501
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6502
      if ($haystackTmp === false) {
6503
        $haystackTmp = '';
6504
      }
6505
      $haystack = (string)$haystackTmp;
6506
    }
6507
6508
    if ($encoding !== 'UTF-8') {
6509
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6510
    }
6511
6512
    if ($cleanUtf8 === true) {
6513
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6514
      // if invalid characters are found in $haystack before $needle
6515
      $needle = self::clean($needle);
6516
      $haystack = self::clean($haystack);
6517
    }
6518
6519
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6520
      self::checkForSupport();
6521
    }
6522
6523
    if (
6524
        $encoding !== 'UTF-8'
6525
        &&
6526
        self::$SUPPORT['mbstring'] === false
6527
    ) {
6528
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6529
    }
6530
6531
    if (self::$SUPPORT['mbstring'] === true) {
6532
      return \mb_substr_count($haystack, $needle, $encoding);
6533
    }
6534
6535
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6536
6537
    return count($matches);
6538
  }
6539
6540
  /**
6541
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6542
   *
6543
   * @param string $haystack <p>The string to search in.</p>
6544
   * @param string $needle   <p>The substring to search for.</p>
6545
   *
6546
   * @return string <p>Return the sub-string.</p>
6547
   */
6548 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6549
  {
6550
    // init
6551
    $haystack = (string)$haystack;
6552
    $needle = (string)$needle;
6553
6554
    if (!isset($haystack[0])) {
6555
      return '';
6556
    }
6557
6558
    if (!isset($needle[0])) {
6559
      return $haystack;
6560
    }
6561
6562
    if (self::str_istarts_with($haystack, $needle) === true) {
6563
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6564
      if ($haystackTmp === false) {
6565
        $haystackTmp = '';
6566
      }
6567
      $haystack = (string)$haystackTmp;
6568
    }
6569
6570
    return $haystack;
6571
  }
6572
6573
  /**
6574
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6575
   *
6576
   * @param string $haystack <p>The string to search in.</p>
6577
   * @param string $needle   <p>The substring to search for.</p>
6578
   *
6579
   * @return string <p>Return the sub-string.</p>
6580
   */
6581 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6582
  {
6583
    // init
6584
    $haystack = (string)$haystack;
6585
    $needle = (string)$needle;
6586
6587
    if (!isset($haystack[0])) {
6588
      return '';
6589
    }
6590
6591
    if (!isset($needle[0])) {
6592
      return $haystack;
6593
    }
6594
6595
    if (self::str_iends_with($haystack, $needle) === true) {
6596
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6597
      if ($haystackTmp === false) {
6598
        $haystackTmp = '';
6599
      }
6600
      $haystack = (string)$haystackTmp;
6601
    }
6602
6603
    return $haystack;
6604
  }
6605
6606
  /**
6607
   * Removes an prefix ($needle) from start of the string ($haystack).
6608
   *
6609
   * @param string $haystack <p>The string to search in.</p>
6610
   * @param string $needle   <p>The substring to search for.</p>
6611
   *
6612
   * @return string <p>Return the sub-string.</p>
6613
   */
6614 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6615
  {
6616
    // init
6617
    $haystack = (string)$haystack;
6618
    $needle = (string)$needle;
6619
6620
    if (!isset($haystack[0])) {
6621
      return '';
6622
    }
6623
6624
    if (!isset($needle[0])) {
6625
      return $haystack;
6626
    }
6627
6628
    if (self::str_starts_with($haystack, $needle) === true) {
6629
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6630
      if ($haystackTmp === false) {
6631
        $haystackTmp = '';
6632
      }
6633
      $haystack = (string)$haystackTmp;
6634
    }
6635
6636
    return $haystack;
6637
  }
6638
6639
  /**
6640
   * Replace text within a portion of a string.
6641
   *
6642
   * source: https://gist.github.com/stemar/8287074
6643
   *
6644
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6645
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6646
   * @param int|int[]       $offset           <p>
6647
   *                                          If start is positive, the replacing will begin at the start'th offset
6648
   *                                          into string.
6649
   *                                          <br /><br />
6650
   *                                          If start is negative, the replacing will begin at the start'th character
6651
   *                                          from the end of string.
6652
   *                                          </p>
6653
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6654
   *                                          portion of string which is to be replaced. If it is negative, it
6655
   *                                          represents the number of characters from the end of string at which to
6656
   *                                          stop replacing. If it is not given, then it will default to strlen(
6657
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6658
   *                                          length is zero then this function will have the effect of inserting
6659
   *                                          replacement into string at the given start offset.</p>
6660
   *
6661
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6662
   */
6663
  public static function substr_replace($str, $replacement, $offset, $length = null)
6664
  {
6665
    if (is_array($str) === true) {
6666
      $num = count($str);
6667
6668
      // the replacement
6669
      if (is_array($replacement) === true) {
6670
        $replacement = array_slice($replacement, 0, $num);
6671
      } else {
6672
        $replacement = array_pad(array($replacement), $num, $replacement);
6673
      }
6674
6675
      // the offset
6676 View Code Duplication
      if (is_array($offset) === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6677
        $offset = array_slice($offset, 0, $num);
6678
        foreach ($offset as &$valueTmp) {
6679
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6680
        }
6681
        unset($valueTmp);
6682
      } else {
6683
        $offset = array_pad(array($offset), $num, $offset);
6684
      }
6685
6686
      // the length
6687
      if (!isset($length)) {
6688
        $length = array_fill(0, $num, 0);
6689 View Code Duplication
      } elseif (is_array($length) === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6690
        $length = array_slice($length, 0, $num);
6691
        foreach ($length as &$valueTmpV2) {
6692
          if (isset($valueTmpV2)) {
6693
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6694
          } else {
6695
            $valueTmpV2 = 0;
6696
          }
6697
        }
6698
        unset($valueTmpV2);
6699
      } else {
6700
        $length = array_pad(array($length), $num, $length);
6701
      }
6702
6703
      // recursive call
6704
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
6705
6706
    }
6707
6708
    if (is_array($replacement) === true) {
6709
      if (count($replacement) > 0) {
6710
        $replacement = $replacement[0];
6711
      } else {
6712
        $replacement = '';
6713
      }
6714
    }
6715
6716
    // init
6717
    $str = (string)$str;
6718
    $replacement = (string)$replacement;
6719
6720
    if (!isset($str[0])) {
6721
      return $replacement;
6722
    }
6723
6724
    preg_match_all('/./us', $str, $smatches);
6725
    preg_match_all('/./us', $replacement, $rmatches);
6726
6727
    if ($length === null) {
6728
      $length = (int)self::strlen($str);
6729
    }
6730
6731
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6732
6733
    return implode('', $smatches[0]);
6734
  }
6735
6736
  /**
6737
   * Removes an suffix ($needle) from end of the string ($haystack).
6738
   *
6739
   * @param string $haystack <p>The string to search in.</p>
6740
   * @param string $needle   <p>The substring to search for.</p>
6741
   *
6742
   * @return string <p>Return the sub-string.</p>
6743
   */
6744 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6745
  {
6746
    $haystack = (string)$haystack;
6747
    $needle = (string)$needle;
6748
6749
    if (!isset($haystack[0])) {
6750
      return '';
6751
    }
6752
6753
    if (!isset($needle[0])) {
6754
      return $haystack;
6755
    }
6756
6757
    if (self::str_ends_with($haystack, $needle) === true) {
6758
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6759
      if ($haystackTmp === false) {
6760
        $haystackTmp = '';
6761
      }
6762
      $haystack = (string)$haystackTmp;
6763
    }
6764
6765
    return $haystack;
6766
  }
6767
6768
  /**
6769
   * Returns a case swapped version of the string.
6770
   *
6771
   * @param string  $str       <p>The input string.</p>
6772
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6773
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6774
   *
6775
   * @return string <p>Each character's case swapped.</p>
6776
   */
6777
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6778
  {
6779
    $str = (string)$str;
6780
6781
    if (!isset($str[0])) {
6782
      return '';
6783
    }
6784
6785
    if ($encoding !== 'UTF-8') {
6786
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6787
    }
6788
6789
    if ($cleanUtf8 === true) {
6790
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6791
      // if invalid characters are found in $haystack before $needle
6792
      $str = self::clean($str);
6793
    }
6794
6795
    $strSwappedCase = preg_replace_callback(
6796
        '/[\S]/u',
6797
        function ($match) use ($encoding) {
6798
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6799
6800
          if ($match[0] === $marchToUpper) {
6801
            return UTF8::strtolower($match[0], $encoding);
6802
          }
6803
6804
          return $marchToUpper;
6805
        },
6806
        $str
6807
    );
6808
6809
    return $strSwappedCase;
6810
  }
6811
6812
  /**
6813
   * alias for "UTF8::to_ascii()"
6814
   *
6815
   * @see UTF8::to_ascii()
6816
   *
6817
   * @param string $s
6818
   * @param string $subst_chr
6819
   * @param bool   $strict
6820
   *
6821
   * @return string
6822
   *
6823
   * @deprecated
6824
   */
6825
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6826
  {
6827
    return self::to_ascii($s, $subst_chr, $strict);
6828
  }
6829
6830
  /**
6831
   * alias for "UTF8::to_iso8859()"
6832
   *
6833
   * @see UTF8::to_iso8859()
6834
   *
6835
   * @param string $str
6836
   *
6837
   * @return string|string[]
6838
   *
6839
   * @deprecated
6840
   */
6841
  public static function toIso8859($str)
6842
  {
6843
    return self::to_iso8859($str);
6844
  }
6845
6846
  /**
6847
   * alias for "UTF8::to_latin1()"
6848
   *
6849
   * @see UTF8::to_latin1()
6850
   *
6851
   * @param $str
6852
   *
6853
   * @return string
6854
   *
6855
   * @deprecated
6856
   */
6857
  public static function toLatin1($str)
6858
  {
6859
    return self::to_latin1($str);
6860
  }
6861
6862
  /**
6863
   * alias for "UTF8::to_utf8()"
6864
   *
6865
   * @see UTF8::to_utf8()
6866
   *
6867
   * @param string $str
6868
   *
6869
   * @return string
6870
   *
6871
   * @deprecated
6872
   */
6873
  public static function toUTF8($str)
6874
  {
6875
    return self::to_utf8($str);
6876
  }
6877
6878
  /**
6879
   * Convert a string into ASCII.
6880
   *
6881
   * @param string $str     <p>The input string.</p>
6882
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6883
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6884
   *                        performance</p>
6885
   *
6886
   * @return string
6887
   */
6888
  public static function to_ascii($str, $unknown = '?', $strict = false)
6889
  {
6890
    static $UTF8_TO_ASCII;
6891
6892
    // init
6893
    $str = (string)$str;
6894
6895
    if (!isset($str[0])) {
6896
      return '';
6897
    }
6898
6899
    $str = self::clean($str, true, true, true);
6900
6901
    // check if we only have ASCII
6902
    if (self::is_ascii($str) === true) {
6903
      return $str;
6904
    }
6905
6906
    if ($strict === true) {
6907
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6908
        self::checkForSupport();
6909
      }
6910
6911
      if (
6912
          self::$SUPPORT['intl'] === true
6913
          &&
6914
          Bootup::is_php('5.4') === true
6915
      ) {
6916
6917
        // HACK for issue from "transliterator_transliterate()"
6918
        $str = str_replace(
6919
            'ℌ',
6920
            'H',
6921
            $str
6922
        );
6923
6924
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6925
6926
        // check again, if we only have ASCII, now ...
6927
        if (self::is_ascii($str) === true) {
6928
          return $str;
6929
        }
6930
6931
      }
6932
    }
6933
6934
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6935
    $chars = $ar[0];
6936
    foreach ($chars as &$c) {
6937
6938
      $ordC0 = ord($c[0]);
6939
6940
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6941
        continue;
6942
      }
6943
6944
      $ordC1 = ord($c[1]);
6945
6946
      // ASCII - next please
6947
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6948
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6949
      }
6950
6951
      if ($ordC0 >= 224) {
6952
        $ordC2 = ord($c[2]);
6953
6954
        if ($ordC0 <= 239) {
6955
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6956
        }
6957
6958
        if ($ordC0 >= 240) {
6959
          $ordC3 = ord($c[3]);
6960
6961
          if ($ordC0 <= 247) {
6962
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6963
          }
6964
6965
          if ($ordC0 >= 248) {
6966
            $ordC4 = ord($c[4]);
6967
6968 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6969
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6970
            }
6971
6972
            if ($ordC0 >= 252) {
6973
              $ordC5 = ord($c[5]);
6974
6975 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6976
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6977
              }
6978
            }
6979
          }
6980
        }
6981
      }
6982
6983
      if ($ordC0 == 254 || $ordC0 == 255) {
6984
        $c = $unknown;
6985
        continue;
6986
      }
6987
6988
      if (!isset($ord)) {
6989
        $c = $unknown;
6990
        continue;
6991
      }
6992
6993
      $bank = $ord >> 8;
6994
      if (!isset($UTF8_TO_ASCII[$bank])) {
6995
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6996
        if ($UTF8_TO_ASCII[$bank] === false) {
6997
          $UTF8_TO_ASCII[$bank] = array();
6998
        }
6999
      }
7000
7001
      $newchar = $ord & 255;
7002
7003
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
7004
7005
        // keep for debugging
7006
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
45% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
7007
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7008
        echo "char: " . $c . "\n";
7009
        echo "ord: " . $ord . "\n";
7010
        echo "newchar: " . $newchar . "\n";
7011
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7012
        echo "bank:" . $bank . "\n\n";
7013
        */
7014
7015
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7016
      } else {
7017
7018
        // keep for debugging missing chars
7019
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
41% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
7020
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7021
        echo "char: " . $c . "\n";
7022
        echo "ord: " . $ord . "\n";
7023
        echo "newchar: " . $newchar . "\n";
7024
        echo "bank:" . $bank . "\n\n";
7025
        */
7026
7027
        $c = $unknown;
7028
      }
7029
    }
7030
7031
    return implode('', $chars);
7032
  }
7033
7034
  /**
7035
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7036
   *
7037
   * @param string|string[] $str
7038
   *
7039
   * @return string|string[]
7040
   */
7041
  public static function to_iso8859($str)
7042
  {
7043
    if (is_array($str) === true) {
7044
7045
      /** @noinspection ForeachSourceInspection */
7046
      foreach ($str as $k => $v) {
7047
        /** @noinspection AlterInForeachInspection */
7048
        /** @noinspection OffsetOperationsInspection */
7049
        $str[$k] = self::to_iso8859($v);
7050
      }
7051
7052
      return $str;
7053
    }
7054
7055
    $str = (string)$str;
7056
7057
    if (!isset($str[0])) {
7058
      return '';
7059
    }
7060
7061
    return self::utf8_decode($str);
7062
  }
7063
7064
  /**
7065
   * alias for "UTF8::to_iso8859()"
7066
   *
7067
   * @see UTF8::to_iso8859()
7068
   *
7069
   * @param string|string[] $str
7070
   *
7071
   * @return string|string[]
7072
   */
7073
  public static function to_latin1($str)
7074
  {
7075
    return self::to_iso8859($str);
7076
  }
7077
7078
  /**
7079
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7080
   *
7081
   * <ul>
7082
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7083
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
7084
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7085
   * case.</li>
7086
   * </ul>
7087
   *
7088
   * @param string|string[] $str                    <p>Any string or array.</p>
7089
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7090
   *
7091
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7092
   */
7093
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7094
  {
7095
    if (is_array($str) === true) {
7096
      /** @noinspection ForeachSourceInspection */
7097
      foreach ($str as $k => $v) {
7098
        /** @noinspection AlterInForeachInspection */
7099
        /** @noinspection OffsetOperationsInspection */
7100
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7101
      }
7102
7103
      return $str;
7104
    }
7105
7106
    $str = (string)$str;
7107
7108
    if (!isset($str[0])) {
7109
      return $str;
7110
    }
7111
7112
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7113
      self::checkForSupport();
7114
    }
7115
7116 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7117
      $max = \mb_strlen($str, '8BIT');
7118
    } else {
7119
      $max = strlen($str);
7120
    }
7121
7122
    $buf = '';
7123
7124
    /** @noinspection ForeachInvariantsInspection */
7125
    for ($i = 0; $i < $max; $i++) {
7126
7127
      $c1 = $str[$i];
7128
7129
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7130
7131
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7132
7133
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7134
7135
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7136
            $buf .= $c1 . $c2;
7137
            $i++;
7138 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7139
            $cc1tmp = ord($c1) / 64;
7140
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
7141
            $cc2 = ($c1 & "\x3F") | "\x80";
7142
            $buf .= $cc1 . $cc2;
7143
          }
7144
7145
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7146
7147
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7148
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7149
7150
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7151
            $buf .= $c1 . $c2 . $c3;
7152
            $i += 2;
7153 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7154
            $cc1tmp = ord($c1) / 64;
7155
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
7156
            $cc2 = ($c1 & "\x3F") | "\x80";
7157
            $buf .= $cc1 . $cc2;
7158
          }
7159
7160
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7161
7162
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7163
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7164
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7165
7166
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7167
            $buf .= $c1 . $c2 . $c3 . $c4;
7168
            $i += 3;
7169 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7170
            $cc1tmp = ord($c1) / 64;
7171
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
7172
            $cc2 = ($c1 & "\x3F") | "\x80";
7173
            $buf .= $cc1 . $cc2;
7174
          }
7175
7176 View Code Duplication
        } else { // doesn't look like UTF8, but should be converted
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7177
          $cc1tmp = ord($c1) / 64;
7178
          $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
7179
          $cc2 = ($c1 & "\x3F") | "\x80";
7180
          $buf .= $cc1 . $cc2;
7181
        }
7182
7183
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7184
7185
        $ordC1 = ord($c1);
7186
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7187
          $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7188 View Code Duplication
        } else {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7189
          $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7190
          $cc2 = ($c1 & "\x3F") | "\x80";
7191
          $buf .= $cc1 . $cc2;
7192
        }
7193
7194
      } else { // it doesn't need conversion
7195
        $buf .= $c1;
7196
      }
7197
    }
7198
7199
    // decode unicode escape sequences
7200
    $buf = preg_replace_callback(
7201
        '/\\\\u([0-9a-f]{4})/i',
7202
        function ($match) {
7203
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7204
        },
7205
        $buf
7206
    );
7207
7208
    // decode UTF-8 codepoints
7209
    if ($decodeHtmlEntityToUtf8 === true) {
7210
      $buf = self::html_entity_decode($buf);
7211
    }
7212
7213
    return $buf;
7214
  }
7215
7216
  /**
7217
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7218
   *
7219
   * INFO: This is slower then "trim()"
7220
   *
7221
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7222
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7223
   *
7224
   * @param string $str   <p>The string to be trimmed</p>
7225
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7226
   *
7227
   * @return string <p>The trimmed string.</p>
7228
   */
7229
  public static function trim($str = '', $chars = INF)
7230
  {
7231
    $str = (string)$str;
7232
7233
    if (!isset($str[0])) {
7234
      return '';
7235
    }
7236
7237
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7238
    if ($chars === INF || !$chars) {
7239
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7240
    }
7241
7242
    return self::rtrim(self::ltrim($str, $chars), $chars);
7243
  }
7244
7245
  /**
7246
   * Makes string's first char uppercase.
7247
   *
7248
   * @param string  $str       <p>The input string.</p>
7249
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
7250
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
7251
   *
7252
   * @return string <p>The resulting string</p>
7253
   */
7254 View Code Duplication
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7255
  {
7256
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
7257
    if ($strPartTwo === false) {
7258
      $strPartTwo = '';
7259
    }
7260
7261
    $strPartOne = self::strtoupper(
7262
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
7263
        $encoding,
7264
        $cleanUtf8
7265
    );
7266
7267
    return $strPartOne . $strPartTwo;
7268
  }
7269
7270
  /**
7271
   * alias for "UTF8::ucfirst()"
7272
   *
7273
   * @see UTF8::ucfirst()
7274
   *
7275
   * @param string  $word
7276
   * @param string  $encoding
7277
   * @param boolean $cleanUtf8
7278
   *
7279
   * @return string
7280
   */
7281
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7282
  {
7283
    return self::ucfirst($word, $encoding, $cleanUtf8);
7284
  }
7285
7286
  /**
7287
   * Uppercase for all words in the string.
7288
   *
7289
   * @param string   $str        <p>The input string.</p>
7290
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7291
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7292
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
7293
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
7294
   *
7295
   * @return string
7296
   */
7297 View Code Duplication
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7298
  {
7299
    if (!$str) {
7300
      return '';
7301
    }
7302
7303
    $words = self::str_to_words($str, $charlist);
7304
    $newWords = array();
7305
7306
    if (count($exceptions) > 0) {
7307
      $useExceptions = true;
7308
    } else {
7309
      $useExceptions = false;
7310
    }
7311
7312
    foreach ($words as $word) {
7313
7314
      if (!$word) {
7315
        continue;
7316
      }
7317
7318
      if (
7319
          $useExceptions === false
7320
          ||
7321
          (
7322
              $useExceptions === true
7323
              &&
7324
              !in_array($word, $exceptions, true)
7325
          )
7326
      ) {
7327
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
7328
      }
7329
7330
      $newWords[] = $word;
7331
    }
7332
7333
    return implode('', $newWords);
7334
  }
7335
7336
  /**
7337
   * Multi decode html entity & fix urlencoded-win1252-chars.
7338
   *
7339
   * e.g:
7340
   * 'test+test'                     => 'test test'
7341
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7342
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7343
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7344
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7345
   * 'Düsseldorf'                   => 'Düsseldorf'
7346
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7347
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7348
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7349
   *
7350
   * @param string $str          <p>The input string.</p>
7351
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7352
   *
7353
   * @return string
7354
   */
7355 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7356
  {
7357
    $str = (string)$str;
7358
7359
    if (!isset($str[0])) {
7360
      return '';
7361
    }
7362
7363
    $pattern = '/%u([0-9a-f]{3,4})/i';
7364
    if (preg_match($pattern, $str)) {
7365
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7366
    }
7367
7368
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7369
7370
    do {
7371
      $str_compare = $str;
7372
7373
      $str = self::fix_simple_utf8(
7374
          urldecode(
7375
              self::html_entity_decode(
7376
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7377
                  $flags
7378
              )
7379
          )
7380
      );
7381
7382
    } while ($multi_decode === true && $str_compare !== $str);
7383
7384
    return (string)$str;
7385
  }
7386
7387
  /**
7388
   * Return a array with "urlencoded"-win1252 -> UTF-8
7389
   *
7390
   * @deprecated use the "UTF8::urldecode()" function to decode a string
7391
   *
7392
   * @return array
7393
   */
7394
  public static function urldecode_fix_win1252_chars()
7395
  {
7396
    return array(
7397
        '%20' => ' ',
7398
        '%21' => '!',
7399
        '%22' => '"',
7400
        '%23' => '#',
7401
        '%24' => '$',
7402
        '%25' => '%',
7403
        '%26' => '&',
7404
        '%27' => "'",
7405
        '%28' => '(',
7406
        '%29' => ')',
7407
        '%2A' => '*',
7408
        '%2B' => '+',
7409
        '%2C' => ',',
7410
        '%2D' => '-',
7411
        '%2E' => '.',
7412
        '%2F' => '/',
7413
        '%30' => '0',
7414
        '%31' => '1',
7415
        '%32' => '2',
7416
        '%33' => '3',
7417
        '%34' => '4',
7418
        '%35' => '5',
7419
        '%36' => '6',
7420
        '%37' => '7',
7421
        '%38' => '8',
7422
        '%39' => '9',
7423
        '%3A' => ':',
7424
        '%3B' => ';',
7425
        '%3C' => '<',
7426
        '%3D' => '=',
7427
        '%3E' => '>',
7428
        '%3F' => '?',
7429
        '%40' => '@',
7430
        '%41' => 'A',
7431
        '%42' => 'B',
7432
        '%43' => 'C',
7433
        '%44' => 'D',
7434
        '%45' => 'E',
7435
        '%46' => 'F',
7436
        '%47' => 'G',
7437
        '%48' => 'H',
7438
        '%49' => 'I',
7439
        '%4A' => 'J',
7440
        '%4B' => 'K',
7441
        '%4C' => 'L',
7442
        '%4D' => 'M',
7443
        '%4E' => 'N',
7444
        '%4F' => 'O',
7445
        '%50' => 'P',
7446
        '%51' => 'Q',
7447
        '%52' => 'R',
7448
        '%53' => 'S',
7449
        '%54' => 'T',
7450
        '%55' => 'U',
7451
        '%56' => 'V',
7452
        '%57' => 'W',
7453
        '%58' => 'X',
7454
        '%59' => 'Y',
7455
        '%5A' => 'Z',
7456
        '%5B' => '[',
7457
        '%5C' => '\\',
7458
        '%5D' => ']',
7459
        '%5E' => '^',
7460
        '%5F' => '_',
7461
        '%60' => '`',
7462
        '%61' => 'a',
7463
        '%62' => 'b',
7464
        '%63' => 'c',
7465
        '%64' => 'd',
7466
        '%65' => 'e',
7467
        '%66' => 'f',
7468
        '%67' => 'g',
7469
        '%68' => 'h',
7470
        '%69' => 'i',
7471
        '%6A' => 'j',
7472
        '%6B' => 'k',
7473
        '%6C' => 'l',
7474
        '%6D' => 'm',
7475
        '%6E' => 'n',
7476
        '%6F' => 'o',
7477
        '%70' => 'p',
7478
        '%71' => 'q',
7479
        '%72' => 'r',
7480
        '%73' => 's',
7481
        '%74' => 't',
7482
        '%75' => 'u',
7483
        '%76' => 'v',
7484
        '%77' => 'w',
7485
        '%78' => 'x',
7486
        '%79' => 'y',
7487
        '%7A' => 'z',
7488
        '%7B' => '{',
7489
        '%7C' => '|',
7490
        '%7D' => '}',
7491
        '%7E' => '~',
7492
        '%7F' => '',
7493
        '%80' => '`',
7494
        '%81' => '',
7495
        '%82' => '‚',
7496
        '%83' => 'ƒ',
7497
        '%84' => '„',
7498
        '%85' => '…',
7499
        '%86' => '†',
7500
        '%87' => '‡',
7501
        '%88' => 'ˆ',
7502
        '%89' => '‰',
7503
        '%8A' => 'Š',
7504
        '%8B' => '‹',
7505
        '%8C' => 'Œ',
7506
        '%8D' => '',
7507
        '%8E' => 'Ž',
7508
        '%8F' => '',
7509
        '%90' => '',
7510
        '%91' => '‘',
7511
        '%92' => '’',
7512
        '%93' => '“',
7513
        '%94' => '”',
7514
        '%95' => '•',
7515
        '%96' => '–',
7516
        '%97' => '—',
7517
        '%98' => '˜',
7518
        '%99' => '™',
7519
        '%9A' => 'š',
7520
        '%9B' => '›',
7521
        '%9C' => 'œ',
7522
        '%9D' => '',
7523
        '%9E' => 'ž',
7524
        '%9F' => 'Ÿ',
7525
        '%A0' => '',
7526
        '%A1' => '¡',
7527
        '%A2' => '¢',
7528
        '%A3' => '£',
7529
        '%A4' => '¤',
7530
        '%A5' => '¥',
7531
        '%A6' => '¦',
7532
        '%A7' => '§',
7533
        '%A8' => '¨',
7534
        '%A9' => '©',
7535
        '%AA' => 'ª',
7536
        '%AB' => '«',
7537
        '%AC' => '¬',
7538
        '%AD' => '',
7539
        '%AE' => '®',
7540
        '%AF' => '¯',
7541
        '%B0' => '°',
7542
        '%B1' => '±',
7543
        '%B2' => '²',
7544
        '%B3' => '³',
7545
        '%B4' => '´',
7546
        '%B5' => 'µ',
7547
        '%B6' => '¶',
7548
        '%B7' => '·',
7549
        '%B8' => '¸',
7550
        '%B9' => '¹',
7551
        '%BA' => 'º',
7552
        '%BB' => '»',
7553
        '%BC' => '¼',
7554
        '%BD' => '½',
7555
        '%BE' => '¾',
7556
        '%BF' => '¿',
7557
        '%C0' => 'À',
7558
        '%C1' => 'Á',
7559
        '%C2' => 'Â',
7560
        '%C3' => 'Ã',
7561
        '%C4' => 'Ä',
7562
        '%C5' => 'Å',
7563
        '%C6' => 'Æ',
7564
        '%C7' => 'Ç',
7565
        '%C8' => 'È',
7566
        '%C9' => 'É',
7567
        '%CA' => 'Ê',
7568
        '%CB' => 'Ë',
7569
        '%CC' => 'Ì',
7570
        '%CD' => 'Í',
7571
        '%CE' => 'Î',
7572
        '%CF' => 'Ï',
7573
        '%D0' => 'Ð',
7574
        '%D1' => 'Ñ',
7575
        '%D2' => 'Ò',
7576
        '%D3' => 'Ó',
7577
        '%D4' => 'Ô',
7578
        '%D5' => 'Õ',
7579
        '%D6' => 'Ö',
7580
        '%D7' => '×',
7581
        '%D8' => 'Ø',
7582
        '%D9' => 'Ù',
7583
        '%DA' => 'Ú',
7584
        '%DB' => 'Û',
7585
        '%DC' => 'Ü',
7586
        '%DD' => 'Ý',
7587
        '%DE' => 'Þ',
7588
        '%DF' => 'ß',
7589
        '%E0' => 'à',
7590
        '%E1' => 'á',
7591
        '%E2' => 'â',
7592
        '%E3' => 'ã',
7593
        '%E4' => 'ä',
7594
        '%E5' => 'å',
7595
        '%E6' => 'æ',
7596
        '%E7' => 'ç',
7597
        '%E8' => 'è',
7598
        '%E9' => 'é',
7599
        '%EA' => 'ê',
7600
        '%EB' => 'ë',
7601
        '%EC' => 'ì',
7602
        '%ED' => 'í',
7603
        '%EE' => 'î',
7604
        '%EF' => 'ï',
7605
        '%F0' => 'ð',
7606
        '%F1' => 'ñ',
7607
        '%F2' => 'ò',
7608
        '%F3' => 'ó',
7609
        '%F4' => 'ô',
7610
        '%F5' => 'õ',
7611
        '%F6' => 'ö',
7612
        '%F7' => '÷',
7613
        '%F8' => 'ø',
7614
        '%F9' => 'ù',
7615
        '%FA' => 'ú',
7616
        '%FB' => 'û',
7617
        '%FC' => 'ü',
7618
        '%FD' => 'ý',
7619
        '%FE' => 'þ',
7620
        '%FF' => 'ÿ',
7621
    );
7622
  }
7623
7624
  /**
7625
   * Decodes an UTF-8 string to ISO-8859-1.
7626
   *
7627
   * @param string $str <p>The input string.</p>
7628
   *
7629
   * @return string
7630
   */
7631
  public static function utf8_decode($str)
7632
  {
7633
    // init
7634
    $str = (string)$str;
7635
7636
    if (!isset($str[0])) {
7637
      return '';
7638
    }
7639
7640
    $str = (string)self::to_utf8($str);
7641
7642
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7643
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7644
7645
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7646
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7647
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7648
    }
7649
7650
    /** @noinspection PhpInternalEntityUsedInspection */
7651
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7652
7653
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7654
      self::checkForSupport();
7655
    }
7656
7657 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7658
      $len = \mb_strlen($str, '8BIT');
7659
    } else {
7660
      $len = strlen($str);
7661
    }
7662
7663
    /** @noinspection ForeachInvariantsInspection */
7664
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7665
      switch ($str[$i] & "\xF0") {
7666
        case "\xC0":
7667
        case "\xD0":
7668
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7669
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7670
          break;
7671
7672
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7673
          ++$i;
7674
        case "\xE0":
7675
          $str[$j] = '?';
7676
          $i += 2;
7677
          break;
7678
7679
        default:
7680
          $str[$j] = $str[$i];
7681
      }
7682
    }
7683
7684
    return (string)self::substr($str, 0, $j, '8BIT');
7685
  }
7686
7687
  /**
7688
   * Encodes an ISO-8859-1 string to UTF-8.
7689
   *
7690
   * @param string $str <p>The input string.</p>
7691
   *
7692
   * @return string
7693
   */
7694
  public static function utf8_encode($str)
7695
  {
7696
    // init
7697
    $str = (string)$str;
7698
7699
    if (!isset($str[0])) {
7700
      return '';
7701
    }
7702
7703
    $strTmp = \utf8_encode($str);
7704
    if ($strTmp === false) {
7705
      return '';
7706
    }
7707
7708
    $str = (string)$strTmp;
7709
    if (false === strpos($str, "\xC2")) {
7710
      return $str;
7711
    }
7712
7713
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7714
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7715
7716
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7717
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7718
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7719
    }
7720
7721
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7722
  }
7723
7724
  /**
7725
   * fix -> utf8-win1252 chars
7726
   *
7727
   * @param string $str <p>The input string.</p>
7728
   *
7729
   * @return string
7730
   *
7731
   * @deprecated use "UTF8::fix_simple_utf8()"
7732
   */
7733
  public static function utf8_fix_win1252_chars($str)
7734
  {
7735
    return self::fix_simple_utf8($str);
7736
  }
7737
7738
  /**
7739
   * Returns an array with all utf8 whitespace characters.
7740
   *
7741
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7742
   *
7743
   * @author: Derek E. [email protected]
7744
   *
7745
   * @return array <p>
7746
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7747
   *               as defined in above URL.
7748
   *               </p>
7749
   */
7750
  public static function whitespace_table()
7751
  {
7752
    return self::$WHITESPACE_TABLE;
7753
  }
7754
7755
  /**
7756
   * Limit the number of words in a string.
7757
   *
7758
   * @param string $str      <p>The input string.</p>
7759
   * @param int    $words    <p>The limit of words as integer.</p>
7760
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7761
   *
7762
   * @return string
7763
   */
7764
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7765
  {
7766
    $str = (string)$str;
7767
7768
    if (!isset($str[0])) {
7769
      return '';
7770
    }
7771
7772
    $words = (int)$words;
7773
7774
    if ($words < 1) {
7775
      return '';
7776
    }
7777
7778
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7779
7780
    if (
7781
        !isset($matches[0])
7782
        ||
7783
        self::strlen($str) === self::strlen($matches[0])
7784
    ) {
7785
      return $str;
7786
    }
7787
7788
    return self::rtrim($matches[0]) . $strAddOn;
7789
  }
7790
7791
  /**
7792
   * Wraps a string to a given number of characters
7793
   *
7794
   * @link  http://php.net/manual/en/function.wordwrap.php
7795
   *
7796
   * @param string $str   <p>The input string.</p>
7797
   * @param int    $width [optional] <p>The column width.</p>
7798
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7799
   * @param bool   $cut   [optional] <p>
7800
   *                      If the cut is set to true, the string is
7801
   *                      always wrapped at or before the specified width. So if you have
7802
   *                      a word that is larger than the given width, it is broken apart.
7803
   *                      </p>
7804
   *
7805
   * @return string <p>The given string wrapped at the specified column.</p>
7806
   */
7807
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7808
  {
7809
    $str = (string)$str;
7810
    $break = (string)$break;
7811
7812
    if (!isset($str[0], $break[0])) {
7813
      return '';
7814
    }
7815
7816
    $w = '';
7817
    $strSplit = explode($break, $str);
7818
    $count = count($strSplit);
7819
7820
    $chars = array();
7821
    /** @noinspection ForeachInvariantsInspection */
7822
    for ($i = 0; $i < $count; ++$i) {
7823
7824
      if ($i) {
7825
        $chars[] = $break;
7826
        $w .= '#';
7827
      }
7828
7829
      $c = $strSplit[$i];
7830
      unset($strSplit[$i]);
7831
7832
      foreach (self::split($c) as $c) {
7833
        $chars[] = $c;
7834
        $w .= ' ' === $c ? ' ' : '?';
7835
      }
7836
    }
7837
7838
    $strReturn = '';
7839
    $j = 0;
7840
    $b = $i = -1;
7841
    $w = wordwrap($w, $width, '#', $cut);
7842
7843
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7844
      for (++$i; $i < $b; ++$i) {
7845
        $strReturn .= $chars[$j];
7846
        unset($chars[$j++]);
7847
      }
7848
7849
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7850
        unset($chars[$j++]);
7851
      }
7852
7853
      $strReturn .= $break;
7854
    }
7855
7856
    return $strReturn . implode('', $chars);
7857
  }
7858
7859
  /**
7860
   * Returns an array of Unicode White Space characters.
7861
   *
7862
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7863
   */
7864
  public static function ws()
7865
  {
7866
    return self::$WHITESPACE;
7867
  }
7868
7869
}
7870